/* Computes the staple : mu +-------+ nu | | | | X X Where the mu link can be any su3_matrix. The result is saved in staple. if staple==NULL then the result is not saved. It also adds the computed staple to the fatlink[mu] with weight coef. */ static void compute_gen_staple(QDP_ColorMatrix *staple, int mu, int nu, QDP_ColorMatrix *link, double dcoef, QDP_ColorMatrix *gauge[], QDP_ColorMatrix *fl[]) { QLA_Real coef = dcoef; QDP_ColorMatrix *ts0, *ts1; QDP_ColorMatrix *tmat1, *tmat2; QDP_ColorMatrix *tempmat; ts0 = QDP_create_M(); ts1 = QDP_create_M(); tmat1 = QDP_create_M(); tmat2 = QDP_create_M(); tempmat = QDP_create_M(); /* Upper staple */ QDP_M_eq_sM(ts0, link, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(ts1, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(staple, gauge[nu], tmat1, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(tmat2, gauge[nu], tmat1, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, tmat2, QDP_all); } /* lower staple */ QDP_M_eq_sM(ts0, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmat1, gauge[nu], link, QDP_all); QDP_M_eq_M_times_M(tempmat, tmat1, ts0, QDP_all); QDP_M_eq_sM(ts0, tempmat, QDP_neighbor[nu], QDP_backward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_peq_M(staple, ts0, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, staple, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_peq_r_times_M(fl[mu], &coef, ts0, QDP_all); } QDP_destroy_M(ts0); QDP_destroy_M(ts1); QDP_destroy_M(tmat1); QDP_destroy_M(tmat2); QDP_destroy_M(tempmat); } /* compute_gen_staple */
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *out[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *bt2[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); for(int io=0; io<nout; io++) { //QOP_printf0("%i: ns: %i\n", io, nstaples[io]); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0(" %i: sdir: %i c: %g\n", s, sdir, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(out[io], in[tn], QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, in[tn], QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(out[io], t1, Unumu, QDP_all); nflops += EQMTM+PEQMTM; } else { QDP_M_eq_M_times_Ma(t2, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(out[io], &c, t2, QDP_all); nflops += 2*EQMTM+2*PEQM; } } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[tn], Unumu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[sn], t1, QDP_all); QDP_ColorMatrix *tb = shiftb(t2, nu); if(c==1) { QDP_M_peq_M(out[io], tb, QDP_all); nflops += 2*EQMTM+PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, tb, QDP_all); nflops += 2*EQMTM+2*PEQM; } QDP_discard_M(tb); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); QDP_destroy_M(t1); QDP_destroy_M(t2); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples_deriv(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *deriv[], QDP_ColorMatrix *chain[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *t3, *t4, *tc, *bt2[nd], *bt3[nd], *ctmps[nd]; int ctn[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = bt3[i] = ctmps[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); t3 = QDP_create_M(); t4 = QDP_create_M(); tc = QDP_create_M(); // process in reverse in case calculated staples used as input for others for(int io=nout-1; io>=0; io--) { for(int i=0; i<nd; i++) { if(ctmps[i]) QDP_discard_M(ctmps[i]); ctn[i] = 0; } QDP_M_eq_M(tc, chain[io], QDP_all); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0("io: %i s: %i sdir: %i tn: %i c: %g\n", io, s, sdir, tn, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(deriv[tn], tc, QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(deriv[tn], &c, tc, QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; //QOP_printf0(" mu: %i nu: %i sn: %i\n", mu, nu, sn); QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); QDP_M_eq_Ma_times_M(t2, tc, t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t1, tc, Unumu, QDP_all); QDP_M_eq_Ma_times_M(t3, in[sn], t1, QDP_all); QDP_ColorMatrix *tb3 = shiftb(t3, nu); if(c==1) { QDP_M_peq_M_times_Ma(deriv[sn], t1, Umunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); QDP_M_peq_M(deriv[tn], tb3, QDP_all); nflops += 4*EQMTM+PEQMTM+2*PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Umunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, tb3, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); QDP_discard_M(tb3); } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Cmunu = getC(nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Cmunu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[tn], t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t3, in[tn], Unumu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(deriv[tn], t1, Unumu, QDP_all); QDP_M_peq_M_times_Ma(deriv[sn], t3, Cmunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); nflops += 3*EQMTM+2*PEQMTM+PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, t4, QDP_all); QDP_M_eq_M_times_Ma(t4, t3, Cmunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) { if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); if(bt3[i]!=NULL) QDP_destroy_M(bt3[i]); if(ctmps[i]!=NULL) QDP_destroy_M(ctmps[i]); } QDP_destroy_M(t1); QDP_destroy_M(t2); QDP_destroy_M(t3); QDP_destroy_M(t4); QDP_destroy_M(tc); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
void QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps) { REAL Plaq, Rect, Pgm ; QDP_ColorMatrix *tempmom_qdp[4]; QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu) QDP_ColorMatrix *tmpmat; QDP_ColorMatrix *tmpmat1; QDP_ColorMatrix *tmpmat2; QDP_ColorMatrix *staples; QDP_ColorMatrix *tmpmat3; QDP_ColorMatrix *tmpmat4; int i, k; int mu, nu, sig; double dtime; //REAL eb3 = -eps*beta/3.0; REAL eb3 = -eps/3.0; int j[3][2] = {{1,2}, {0,2}, {0,1}}; // QOP_printf0("beta: %e, eb3: %e\n", beta, eb3); dtime = -QOP_time(); for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = QDP_create_M(); QDP_M_eq_zero(tempmom_qdp[mu], QDP_all); } tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fblink[i] = gauge->links[i]; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } for(i=0; i<6; i++) { Amu[i] = QDP_create_M(); } staples = QDP_create_M(); tmpmat1 = QDP_create_M(); tmpmat2 = QDP_create_M(); tmpmat3 = QDP_create_M(); tmpmat4 = QDP_create_M(); Plaq = coeffs->plaquette; Rect = coeffs->rectangle; Pgm = coeffs->parallelogram; //Construct 3-staples and rectangles for(mu=0; mu<4; mu++) { i=0; for(nu=0; nu<4; nu++) { if(nu!=mu){ // tmpmat1 = Umu(x+nu) QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all); //tmpmat2 = Umu(x-nu) QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all); //tmpmat = U_{nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all); QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); i++; } } // Construct the pgm staples and add them to force QDP_M_eq_zero(staples, QDP_all); i=0; for(nu=0; nu<4; nu++){ if(nu!=mu){ k=0; for(sig=0; sig<4;sig ++){ if(sig!=mu && nu!=sig){ // the nu_sig_mu ... staple and 3 reflections //tmpmat = Amu["sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat3 = U_{-nu}(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat = U_{-nu}(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["-sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); k++; }//close if sig!=nu ... }//close sig loop i++; }// close if nu!=mu }//close the pgm nu loop QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all); }// closes the mu loop #ifdef CHKSUM QLA_ColorMatrix qcm; QLA_Complex det, chk; QLA_c_eq_r(chk, 0); #endif for(mu=0; mu<4; mu++){ QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE? QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE? QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE #ifdef CHKSUM QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all); QLA_C_eq_det_M(&det, &qcm); QLA_c_peq_c(chk, det); #endif } #ifdef CHKSUM QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk)); #endif //DESTROY various fields QDP_destroy_M(tmpmat); QDP_destroy_M(tmpmat1); QDP_destroy_M(tmpmat2); QDP_destroy_M(tmpmat3); QDP_destroy_M(staples); QDP_destroy_M(tmpmat4); for(mu=0; mu<4; mu++){ QDP_destroy_M(tempmom_qdp[mu]); } for(i=0; i<6; i++) { QDP_destroy_M(Amu[i]); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); double nflop = 96720; info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; //QOP_printf0("Time in slow g_force: %e\n", info->final_sec); }
/* Smearing level 0 */ static void QOP_hisq_force_multi_smearing0_fnmat(QOP_info_t *info, REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_naik[4]) { int term; int i,k; int dir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0; QDP_ColorVector *tsrc[2], *vec_tmp[2]; size_t nflops = 0; if( nterms==0 )return; mat_tmp0 = QDP_create_M(); tmat = QDP_create_M(); tsrc[0] = QDP_create_V(); tsrc[1] = QDP_create_V(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, dir ); coeff = 1.; QDP_M_peq_r_times_M(force_accum[dir],&coeff,oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions // // *** Naik part *** / // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_naik[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, DIR3(dir) ); coeff = 1; // fermion_eps is outside this routine in "wrapper" routine QDP_M_peq_r_times_M(force_accum_naik[dir],&coeff, oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions QDP_destroy_V( tsrc[0] ); QDP_destroy_V( tsrc[1] ); QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( tmat ); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; } //hisq_force_multi_smearing0_fnmat
void QOP_hisq_force_multi_wrapper_fnmat(QOP_info_t *info, QOP_FermionLinksHisq *flh, QOP_Force *Force, QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { double dtime = QDP_time(); int i, ipath, dir; REAL coeff_mult; double *eps_naik = hisq_coeff->eps_naik; int n_naiks = hisq_coeff->n_naiks; QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; // Quark paths sorted by net displacement and last directions static Q_path *q_paths_sorted_1 = NULL; static Q_path *q_paths_sorted_2 = NULL; static Q_path *q_paths_sorted_3 = NULL; static int *netbackdir_table_1 = NULL; static int *netbackdir_table_2 = NULL; static int *netbackdir_table_3 = NULL; static int first_force = 1; if(first_force == 1) QOP_make_paths_and_dirs_hisq(hisq_coeff, umethod); int num_q_paths_1 = qop_get_num_q_paths_1(); int num_q_paths_2 = qop_get_num_q_paths_2(); int num_q_paths_3 = qop_get_num_q_paths_3(); Q_path *q_paths_1 = qop_get_q_paths_1(); Q_path *q_paths_2 = qop_get_q_paths_2(); Q_path *q_paths_3 = qop_get_q_paths_3(); Q_path *q_paths_sorted_current = NULL; int *netbackdir_table_current = NULL; int inaik; int n_naik_shift; double final_flop = 0.; size_t nflops = 0; QDP_ColorMatrix * force[4] = {Force->force[0], Force->force[1], Force->force[2], Force->force[3]}; int num_q_paths_current,n_orders_naik_current;//==nterms QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; int nterms = 0, n_order_naik_total; for(inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; n_order_naik_total = nterms; for(i=0;i<4;i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *tmat; QDP_ColorMatrix *mat_tmp0; REAL treal; if( first_force==1 ){ if( q_paths_sorted_1==NULL ) q_paths_sorted_1 = (Q_path *)malloc( num_q_paths_1*sizeof(Q_path) ); if(netbackdir_table_1==NULL ) netbackdir_table_1 = (int *)malloc( num_q_paths_1*sizeof(int) ); if( q_paths_sorted_2==NULL ) q_paths_sorted_2 = (Q_path *)malloc( num_q_paths_2*sizeof(Q_path) ); if(netbackdir_table_2==NULL ) netbackdir_table_2 = (int *)malloc( num_q_paths_2*sizeof(int) ); if( q_paths_sorted_3==NULL ) q_paths_sorted_3 = (Q_path *)malloc( num_q_paths_3*sizeof(Q_path) ); if(netbackdir_table_3==NULL ) netbackdir_table_3 = (int *)malloc( num_q_paths_3*sizeof(int) ); else{QOP_printf0("WARNING: remaking sorted path tables\n"); exit(0); } // make sorted tables sort_quark_paths_hisq( q_paths_1, q_paths_sorted_1, num_q_paths_1, 8 ); for( ipath=0; ipath<num_q_paths_1; ipath++ ) netbackdir_table_1[ipath] = find_backwards_gather( &(q_paths_sorted_1[ipath]) ); sort_quark_paths_hisq( q_paths_2, q_paths_sorted_2, num_q_paths_2, 16 ); for( ipath=0; ipath<num_q_paths_2; ipath++ ) netbackdir_table_2[ipath] = find_backwards_gather( &(q_paths_sorted_2[ipath]) ); sort_quark_paths_hisq( q_paths_3, q_paths_sorted_3, num_q_paths_3, 16 ); for( ipath=0; ipath<num_q_paths_3; ipath++ ) netbackdir_table_3[ipath] = find_backwards_gather( &(q_paths_sorted_3[ipath]) ); first_force=0; } tmat = QDP_create_M(); mat_tmp0 = QDP_create_M(); for(i=XUP;i<=TUP;i++){ force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); } for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_2[dir], QDP_all); // loop on different naik masses n_naik_shift = 0; for( inaik=0; inaik<n_naiks; inaik++ ) { // smearing level 0 if( 0==inaik ) { n_orders_naik_current = n_order_naik_total; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_hisq_force_multi_smearing0_fnmat(info,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_0, force_accum_0_naik); final_flop += info->final_flop; // smearing level 2 if( 0==inaik ) { q_paths_sorted_current = q_paths_sorted_2; num_q_paths_current = num_q_paths_2; netbackdir_table_current = netbackdir_table_2; } else { q_paths_sorted_current = q_paths_sorted_3; num_q_paths_current = num_q_paths_3; netbackdir_table_current = netbackdir_table_3; } QOP_hisq_force_multi_smearing_fnmat( info,Wgf,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_1, force_accum_0, force_accum_0_naik, num_q_paths_current, q_paths_sorted_current, netbackdir_table_current ); //QOP_printf0("HISQ smear0 flops = %g\n", info->final_flop); final_flop += info->final_flop; if( 0==inaik ) { coeff_mult = 1.0; } else { coeff_mult = eps_naik[inaik]; } for(dir=XUP;dir<=TUP;dir++) { QDP_M_peq_r_times_M(force_accum_2[dir],&coeff_mult, force_accum_1[dir],QDP_all); nflops += 36; } n_naik_shift += n_orders_naik[inaik]; } if ( umethod==QOP_UNITARIZE_NONE ){ // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_2, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); final_flop += info->final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ){ // reunitarization QOP_hisq_force_multi_reunit(info,Vgf,force_accum_1u, force_accum_2); //QOP_printf0("reunit flops = %g\n", info->final_flop); final_flop += info->final_flop; // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_1u, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); //QOP_printf0("HISQ smear1 flops = %g\n", info->final_flop); final_flop += info->final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M_times_M(force_final[dir],Ugf[dir],force_accum_1[dir],QDP_all); nflops += 198; } // take into account even/odd parity (it is NOT done in "smearing" routine) //eps multiplication done outside QOP for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M(tmat,force_final[dir],QDP_all); treal = 2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_even); treal = -2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_odd); nflops += 18; } // Put antihermitian traceless part into momentum // add force to momentum for(dir=XUP; dir<=TUP; dir++){ QDP_M_eq_antiherm_M(mat_tmp0, force_final[dir], QDP_all); QDP_M_peq_M(force[dir], mat_tmp0, QDP_all); nflops += 24+18; //QDP_M_peq_M(force_final[dir], force[dir], QDP_all); //QDP_M_eq_antiherm_M(force[dir], force_final[dir], QDP_all); } for(i=XUP;i<=TUP;i++){ QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); QDP_destroy_M( mat_tmp0 ); final_flop += ((double)nflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = final_flop; info->status = QOP_SUCCESS; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); } //hisq_force_multi_wrapper_fnmat
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat
void QOP_hisq_deriv_multi_fnmat2_qdp(QOP_info_t *info, QOP_FermionLinksHisq *flh, QDP_ColorMatrix *deriv[], QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { #define NC QDP_get_nc(deriv[0]) if(!QOP_asqtad.inited) QOP_asqtad_invert_init(); double dtime = QDP_time(); double totalflops = 0; int siteflops = 0; QOP_info_t tinfo; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; for(int i=0; i<4; i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *tmat = QDP_create_M(); for(int i=0; i<4; i++) { force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); QDP_M_eq_zero(force_accum_2[i], QDP_all); } int n_naiks = hisq_coeff->n_naiks; int nterms = 0; for(int inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; // loop on different naik masses int n_naik_shift = 0; for(int inaik=0; inaik<n_naiks; inaik++) { int n_orders_naik_current; if( inaik==0 ) { n_orders_naik_current = nterms; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_get_mid(&tinfo, force_accum_0, QDP_neighbor, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; QOP_get_mid(&tinfo, force_accum_0_naik, QOP_common.neighbor3, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; // compensate for -1 on odd sites here instead of at end for(int dir=0; dir<4; dir++) { QDP_M_eqm_M(force_accum_0[dir], force_accum_0[dir], QDP_odd); QDP_M_eqm_M(force_accum_0_naik[dir], force_accum_0_naik[dir], QDP_odd); } // smearing level 0 for(int i=0; i<4; i++) QDP_M_eq_zero(force_accum_1[i], QDP_all); if(inaik==0) { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->asqtad_one_link; acoef.three_staple = hisq_coeff->asqtad_three_staple; acoef.five_staple = hisq_coeff->asqtad_five_staple; acoef.seven_staple = hisq_coeff->asqtad_seven_staple; acoef.lepage = hisq_coeff->asqtad_lepage; acoef.naik = hisq_coeff->asqtad_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); //QOP_printf0("HISQ smear0 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->difference_one_link; acoef.three_staple = 0; acoef.five_staple = 0; acoef.seven_staple = 0; acoef.lepage = 0; acoef.naik = hisq_coeff->difference_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); totalflops += tinfo.final_flop; } QLA_Real coeff_mult; if( inaik==0 ) { coeff_mult = 1.0; } else { coeff_mult = hisq_coeff->eps_naik[inaik]; } for(int dir=0; dir<4; dir++) { QDP_M_peq_r_times_M(force_accum_2[dir], &coeff_mult, force_accum_1[dir], QDP_all); } siteflops += 4*36; n_naik_shift += n_orders_naik[inaik]; } // smearing level 1 QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->fat7_one_link; acoef.three_staple = hisq_coeff->fat7_three_staple; acoef.five_staple = hisq_coeff->fat7_five_staple; acoef.seven_staple = hisq_coeff->fat7_seven_staple; acoef.lepage = 0; acoef.naik = 0; if(QOP_hisq_links.use_fat7_lepage) { acoef.lepage = hisq_coeff->fat7_lepage; } QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; if ( umethod==QOP_UNITARIZE_NONE ){ for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_2, NULL); totalflops += tinfo.final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ) { for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); // reunitarization #if QOP_Colors == 3 QOP_hisq_force_multi_reunit(&tinfo, Vgf, force_accum_2, force_accum_1u); #else for(int mu=0; mu<4; mu++) { QOP_projectU_deriv_qdp(&tinfo, force_accum_2[mu], Wgf[mu], Vgf[mu], force_accum_1u[mu]); } #endif //QOP_printf0("reunit flops = %g\n", tinfo.final_flop); for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); totalflops += tinfo.final_flop; for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_1u, NULL); //QOP_printf0("HISQ smear1 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // take into account even/odd parity (it is NOT done in "smearing" routine) // eps multiplication done outside QOP // extra factor of 2 for(int dir=0; dir<4; dir++) { QLA_Real treal = 2; //QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_even); //QDP_M_meq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_odd); QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_all); } siteflops += 4*36; for(int i=0; i<4; i++) { QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); totalflops += ((double)siteflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = totalflops; info->status = QOP_SUCCESS; #undef NC }
static void get_staple_plaq(QDP_ColorMatrix *staple, int mu, QDP_ColorMatrix *u[], QOP_gauge_coeffs_t *coeffs, QDP_Subset subset, QDP_Subset osubset) { #define NC QDP_get_nc(staple) QDP_Lattice *lat = QDP_get_lattice_M(staple); int nd = QDP_ndim_L(lat); QDP_Shift *neighbor = QDP_neighbor_L(lat); QLA_Real plaq = coeffs->plaquette; QLA_Real adpl = coeffs->adjoint_plaquette; #if 1 QDP_ColorMatrix *temp1, *temp2, *temp3, *temp4, *temp5, *temp6; //temp1 = QDP_create_M(); temp2 = QDP_create_M_L(lat); //temp3 = QDP_create_M(); temp4 = QDP_create_M_L(lat); //temp5 = QDP_create_M(); temp6 = QDP_create_M_L(lat); QDP_Complex *tc = NULL; if(adpl!=0) tc = QDP_create_C_L(lat); /* staple += u[nu](x) u[mu](x+nu) u*[nu](x+mu) * + u*[nu](x-nu) u[mu](x-nu) u[nu](x-nu+mu) */ for(int nu=0; nu<nd; nu++) { if (nu == mu) continue; temp1 = QDP_create_M_L(lat); temp3 = QDP_create_M_L(lat); temp5 = QDP_create_M_L(lat); QDP_M_eq_sM(temp1, u[nu], neighbor[mu], QDP_forward, QDP_all_L(lat)); QDP_M_eq_Ma_times_M(temp2, u[nu], u[mu], osubset); QDP_M_eq_sM(temp3, u[mu], neighbor[nu], QDP_forward, subset); QDP_M_eq_M_times_M(temp4, temp2, temp1, osubset); QDP_M_eq_sM(temp5, temp4, neighbor[nu], QDP_backward, subset); QDP_M_eq_M_times_M(temp6, u[nu], temp3, subset); //QDP_M_peq_M_times_Ma(staple, temp6, temp1, subset); //QDP_M_peq_M(staple, temp5, subset); if(adpl==0) { QDP_M_peq_M_times_Ma(temp5, temp6, temp1, subset); QDP_M_peq_r_times_M(staple, &plaq, temp5, subset); } else { QLA_Complex z; QLA_c_eq_r(z, plaq/adpl); QDP_C_eq_c(tc, &z, subset); QDP_M_eq_M_times_Ma(temp2, temp6, temp1, subset); QDP_C_peq_M_dot_M(tc, temp2, u[mu], subset); QDP_C_eq_r_times_C(tc, &adpl, tc, subset); QDP_M_peq_C_times_M(staple, tc, temp2, subset); QDP_C_eq_c(tc, &z, subset); QDP_C_peq_M_dot_M(tc, temp5, u[mu], subset); QDP_C_eq_r_times_C(tc, &adpl, tc, subset); QDP_M_peq_C_times_M(staple, tc, temp5, subset); } //QDP_discard_M(temp1); //QDP_discard_M(temp3); //QDP_discard_M(temp5); QDP_destroy_M(temp1); QDP_destroy_M(temp3); QDP_destroy_M(temp5); } /* closes nu loop */ if(adpl!=0) QDP_destroy_C(tc); //QDP_destroy_M(temp1); QDP_destroy_M(temp2); //QDP_destroy_M(temp3); QDP_destroy_M(temp4); //QDP_destroy_M(temp5); QDP_destroy_M(temp6); #else QDP_ColorMatrix *t = QDP_create_M_L(lat); int nu, path[3]; QDP_Subset subs[2]; subs[0] = subset; subs[1] = osubset; for(nu=0; nu<nd; nu++) { if (nu == mu) continue; path[0] = 1+nu; path[1] = -(1+mu); path[2] = -(1+nu); path_prod(u, t, path, 3, 1, subs, neighsubeo); QDP_M_peq_M(staple, t, subset); path[0] = -(1+nu); path[1] = -(1+mu); path[2] = 1+nu; path_prod(u, t, path, 3, 1, subs, neighsubeo); QDP_M_peq_M(staple, t, subset); } QDP_destroy_M(t); #endif }
static void get_staple_imp(QDP_ColorMatrix *staple, int mu, QDP_ColorMatrix **u, QOP_gauge_coeffs_t *coeffs, int subl, QDP_Subset subs[], int (*neighsub)(int subl, int dir)) { #define NC QDP_get_nc(staple) QDP_Lattice *lat = QDP_get_lattice_M(staple); int nd = QDP_ndim_L(lat); int nd2 = 2*nd; QLA_Real plaq = coeffs->plaquette; QLA_Real rect = coeffs->rectangle; QLA_Real pgm = coeffs->parallelogram; QLA_Real adpl = coeffs->adjoint_plaquette; QDP_ColorMatrix *sm0[2][nd2]; QDP_ColorMatrix *t = QDP_create_M_L(lat); for(int i=0; i<2; i++) { tm[i] = QDP_create_M_L(lat); sm[i] = sm0[i]; for(int nu=0; nu<nd2; nu++) { sm[i][nu] = QDP_create_M_L(lat); } } QDP_Complex *tc = NULL; if(adpl!=0) tc = QDP_create_C_L(lat); int mup = 1 + mu; int bsubl = neighsub(subl, mup); int path[5]; QDP_Subset subset = subs[subl]; if(plaq!=0 || adpl!=0) { for(int nu=-nd; nu<=nd; nu++) { if ( nu==-mup || nu==0 || nu==mup ) continue; path[0] = nu; path[1] = -mup; path[2] = -nu; path_prod(u, t, path, 3, bsubl, subs, neighsub); if(adpl==0) { QDP_M_peq_r_times_M(staple, &plaq, t, subset); } else { QLA_Complex z; QLA_c_eq_r(z, plaq/adpl); QDP_C_eq_c(tc, &z, subset); QDP_C_peq_M_dot_M(tc, t, u[mu], subset); QDP_C_eq_r_times_C(tc, &adpl, tc, subset); QDP_M_peq_C_times_M(staple, tc, t, subset); } } } if(rect) { for(int nu=-nd; nu<=nd; nu++) { if ( nu==-mup || nu==0 || nu==mup ) continue; //s = QDP_create_M(); path[0] = nu; path[1] = nu; path[2] = -mup; path[3] = -nu; path[4] = -nu; path_prod(u, t, path, 5, bsubl, subs, neighsub); QDP_M_peq_r_times_M(staple, &rect, t, subset); //QDP_destroy_M(s); //s = QDP_create_M(); path[0] = nu; path[1] = -mup; path[2] = -mup; path[3] = -nu; path[4] = mup; path_prod(u, t, path, 5, bsubl, subs, neighsub); QDP_M_peq_r_times_M(staple, &rect, t, subset); //QDP_destroy_M(s); //s = QDP_create_M(); path[0] = mup; path[1] = nu; path[2] = -mup; path[3] = -mup; path[4] = -nu; path_prod(u, t, path, 5, bsubl, subs, neighsub); QDP_M_peq_r_times_M(staple, &rect, t, subset); //QDP_destroy_M(s); } } if(pgm) { for(int nu=-nd; nu<=nd; nu++) { if ( nu==-mup || nu==0 || nu==mup ) continue; for(int rho=-nd; rho<=nd; rho++) { if ( rho==-mup || rho==0 || rho==mup || rho==-nu || rho==nu ) continue; path[0] = nu; path[1] = rho; path[2] = -mup; path[3] = -nu; path[4] = -rho; path_prod(u, t, path, 5, bsubl, subs, neighsub); QDP_M_peq_r_times_M(staple, &pgm, t, subset); } } } if(adpl!=0) QDP_destroy_C(tc); QDP_destroy_M(t); for(int i=0; i<2; i++) { for(int nu=0; nu<nd2; nu++) { QDP_destroy_M(sm[i][nu]); } QDP_destroy_M(tm[i]); } }