/* special case to transport a "connection" by one link, does both parities */ static void link_transport_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src, QDP_ColorMatrix *gf[4], QDP_ColorMatrix *work, QDP_ColorMatrix *st[8], int dir ){ if( GOES_FORWARDS(dir) ) { QDP_M_eq_M(work, src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(dest, gf[dir], st[dir], QDP_all); QDP_discard_M(st[dir]); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_Ma_times_M(work, gf[OPP_DIR(dir)], src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[OPP_DIR(dir)], QDP_backward,QDP_all); QDP_M_eq_M(dest, st[dir], QDP_all); QDP_discard_M(st[dir]); } } /* link_transport_connection_qdp */
static void path_prod(QDP_ColorMatrix *u[], QDP_ColorMatrix *m, int path[], int len, int subl, QDP_Subset subset[], int (*neighsubl)(int subl, int dir)) { QDP_ShiftDir fb; QDP_ColorMatrix *p=NULL, *s=NULL; QDP_Lattice *lat = QDP_get_lattice_M(m); int nd = QDP_ndim_L(lat); int sn = 0; for(int i=0; i<len; i++) { int dir = abs(path[i])-1; // if the path moves in the + dir then we shift from the backward dir fb = path[i]<0 ? QDP_forward : QDP_backward; if(fb==QDP_backward) { // path is moving in + dir if(i==0) { QDP_M_eq_Ma(tm[sn], u[dir], subset[subl]); } else { QDP_M_eq_Ma_times_M(tm[sn], u[dir], p, subset[subl]); QDP_discard_M(p); } subl = neighsubl(subl, path[i]); s = sm[sn][nd+dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); //p = t1; t1 = t2; t2 = p; sn = 1-sn; p = s; } else { if(i==0) { subl = neighsubl(subl, path[i]); QDP_M_eq_M(tm[1-sn], u[dir], subset[subl]); } else { QDP_M_eq_M(tm[sn], p, subset[subl]); QDP_discard_M(p); subl = neighsubl(subl, path[i]); s = sm[sn][dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); QDP_M_eq_M_times_M(tm[1-sn], u[dir], s, subset[subl]); QDP_discard_M(s); } p = tm[1-sn]; } } QDP_M_eq_M(m, p, subset[subl]); QDP_discard_M(p); QDP_discard_M(s); }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *out[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *bt2[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); for(int io=0; io<nout; io++) { //QOP_printf0("%i: ns: %i\n", io, nstaples[io]); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0(" %i: sdir: %i c: %g\n", s, sdir, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(out[io], in[tn], QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, in[tn], QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(out[io], t1, Unumu, QDP_all); nflops += EQMTM+PEQMTM; } else { QDP_M_eq_M_times_Ma(t2, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(out[io], &c, t2, QDP_all); nflops += 2*EQMTM+2*PEQM; } } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[tn], Unumu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[sn], t1, QDP_all); QDP_ColorMatrix *tb = shiftb(t2, nu); if(c==1) { QDP_M_peq_M(out[io], tb, QDP_all); nflops += 2*EQMTM+PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, tb, QDP_all); nflops += 2*EQMTM+2*PEQM; } QDP_discard_M(tb); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); QDP_destroy_M(t1); QDP_destroy_M(t2); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples_deriv(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *deriv[], QDP_ColorMatrix *chain[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *t3, *t4, *tc, *bt2[nd], *bt3[nd], *ctmps[nd]; int ctn[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = bt3[i] = ctmps[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); t3 = QDP_create_M(); t4 = QDP_create_M(); tc = QDP_create_M(); // process in reverse in case calculated staples used as input for others for(int io=nout-1; io>=0; io--) { for(int i=0; i<nd; i++) { if(ctmps[i]) QDP_discard_M(ctmps[i]); ctn[i] = 0; } QDP_M_eq_M(tc, chain[io], QDP_all); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0("io: %i s: %i sdir: %i tn: %i c: %g\n", io, s, sdir, tn, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(deriv[tn], tc, QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(deriv[tn], &c, tc, QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; //QOP_printf0(" mu: %i nu: %i sn: %i\n", mu, nu, sn); QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); QDP_M_eq_Ma_times_M(t2, tc, t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t1, tc, Unumu, QDP_all); QDP_M_eq_Ma_times_M(t3, in[sn], t1, QDP_all); QDP_ColorMatrix *tb3 = shiftb(t3, nu); if(c==1) { QDP_M_peq_M_times_Ma(deriv[sn], t1, Umunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); QDP_M_peq_M(deriv[tn], tb3, QDP_all); nflops += 4*EQMTM+PEQMTM+2*PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Umunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, tb3, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); QDP_discard_M(tb3); } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Cmunu = getC(nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Cmunu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[tn], t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t3, in[tn], Unumu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(deriv[tn], t1, Unumu, QDP_all); QDP_M_peq_M_times_Ma(deriv[sn], t3, Cmunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); nflops += 3*EQMTM+2*PEQMTM+PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, t4, QDP_all); QDP_M_eq_M_times_Ma(t4, t3, Cmunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) { if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); if(bt3[i]!=NULL) QDP_destroy_M(bt3[i]); if(ctmps[i]!=NULL) QDP_destroy_M(ctmps[i]); } QDP_destroy_M(t1); QDP_destroy_M(t2); QDP_destroy_M(t3); QDP_destroy_M(t4); QDP_destroy_M(tc); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
static void create_fn_links_qdp(QDP_ColorMatrix *fl[], QDP_ColorMatrix *ll[], QDP_ColorMatrix *gf[], asqtad_path_coeff *coeffs) { int i, dir; QDP_ColorMatrix *staple, *tempmat1; int nu,rho,sig ; QLA_Real one_link; #ifdef LLTIME double nflopfl = 61632; double nflopll = 1804; #endif double dtimefl,dtimell; for(i=0; i<4; i++) { fl[i] = QDP_create_M(); ll[i] = QDP_create_M(); } staple = QDP_create_M(); tempmat1 = QDP_create_M(); dtimefl = -dclock(); /* to fix up the Lepage term, included by a trick below */ one_link = coeffs->one_link - 6.0*coeffs->lepage; for(dir=0; dir<4; dir++) { QDP_M_eq_r_times_M(fl[dir], &one_link, gf[dir], QDP_all); for(nu=0; nu<4; nu++) if(nu!=dir) { compute_gen_staple(staple, dir, nu, gf[dir], (double)coeffs->three_staple, gf, fl); compute_gen_staple(NULL, dir, nu, staple, coeffs->lepage, gf, fl); for(rho=0; rho<4; rho++) if((rho!=dir)&&(rho!=nu)) { compute_gen_staple(tempmat1, dir, rho, staple, (double)coeffs->five_staple, gf, fl); for(sig=0; sig<4; sig++) { if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) { compute_gen_staple(NULL, dir, sig, tempmat1, (double)coeffs->seven_staple, gf, fl); } } /* sig */ } /* rho */ } /* nu */ } /* dir */ dtimell = -dclock(); dtimefl -= dtimell; #ifdef LLTIME node0_printf("LLTIME(Fat): time = %e (Asqtad opt) mflops = %e\n",dtimefl, (Real)nflopfl*volume/(1e6*dtimefl*numnodes()) ); #endif /* long links */ for(dir=0; dir<4; dir++) { QLA_Real naik = coeffs->naik; QDP_M_eq_sM(staple, gf[dir], QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tempmat1, gf[dir], staple, QDP_all); QDP_discard_M(staple); QDP_M_eq_sM(staple, tempmat1, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(ll[dir], gf[dir], staple, QDP_all); QDP_M_eq_r_times_M(ll[dir], &naik, ll[dir], QDP_all); } dtimell += dclock(); #ifdef LLTIME node0_printf("LLTIME(long): time = %e (Asqtad opt) mflops = %e\n",dtimell, (Real)nflopll*volume/(1e6*dtimell*numnodes()) ); #endif QDP_destroy_M(staple); QDP_destroy_M(tempmat1); }
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat