コード例 #1
0
/* special case to transport a "connection" by one link, does both parities */
static void 
link_transport_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src,
			       QDP_ColorMatrix *gf[4], QDP_ColorMatrix *work,
                               QDP_ColorMatrix *st[8], int dir ){
  if( GOES_FORWARDS(dir) ) {
    QDP_M_eq_M(work, src, QDP_all);
    QDP_M_eq_sM(st[dir], work, QDP_neighbor[dir], QDP_forward, QDP_all);
    QDP_M_eq_M_times_M(dest, gf[dir], st[dir], QDP_all);
    QDP_discard_M(st[dir]);
  }
  else { /* GOES_BACKWARDS(dir) */
    QDP_M_eq_Ma_times_M(work, gf[OPP_DIR(dir)], src, QDP_all);
    QDP_M_eq_sM(st[dir], work, QDP_neighbor[OPP_DIR(dir)], 
		QDP_backward,QDP_all);
    QDP_M_eq_M(dest, st[dir], QDP_all);
    QDP_discard_M(st[dir]);
  }
} /* link_transport_connection_qdp */
コード例 #2
0
static void
path_prod(QDP_ColorMatrix *u[], QDP_ColorMatrix *m, int path[], int len,
	  int subl, QDP_Subset subset[], int (*neighsubl)(int subl, int dir))
{
  QDP_ShiftDir fb;
  QDP_ColorMatrix *p=NULL, *s=NULL;
  QDP_Lattice *lat = QDP_get_lattice_M(m);
  int nd = QDP_ndim_L(lat);

  int sn = 0;
  for(int i=0; i<len; i++) {
    int dir = abs(path[i])-1;
    // if the path moves in the + dir then we shift from the backward dir
    fb = path[i]<0 ? QDP_forward : QDP_backward;
    if(fb==QDP_backward) { // path is moving in + dir
      if(i==0) {
	QDP_M_eq_Ma(tm[sn], u[dir], subset[subl]);
      } else {
	QDP_M_eq_Ma_times_M(tm[sn], u[dir], p, subset[subl]);
	QDP_discard_M(p);
      }
      subl = neighsubl(subl, path[i]);
      s = sm[sn][nd+dir];
      QDP_discard_M(s);
      QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl));
      //p = t1; t1 = t2; t2 = p;
      sn = 1-sn;
      p = s;
    } else {
      if(i==0) {
	subl = neighsubl(subl, path[i]);
 	QDP_M_eq_M(tm[1-sn], u[dir], subset[subl]);
      } else {
	QDP_M_eq_M(tm[sn], p, subset[subl]);
	QDP_discard_M(p);
	subl = neighsubl(subl, path[i]);
	s = sm[sn][dir];
	QDP_discard_M(s);
	QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl));
	QDP_M_eq_M_times_M(tm[1-sn], u[dir], s, subset[subl]);
	QDP_discard_M(s);
      }
      p = tm[1-sn];
    }
  }
  QDP_M_eq_M(m, p, subset[subl]);
  QDP_discard_M(p);
  QDP_discard_M(s);
}
コード例 #3
0
ファイル: staples_p.c プロジェクト: usqcd-software/qopqdp
// topdir = 1..nd
// sidedir = -nd..nd
// toplinknum,sidelinknum = 0..nin-1
void
QOP_staples(QOP_info_t *info, int nout, int nin,
	    QDP_ColorMatrix *out[], QDP_ColorMatrix *in[],
	    int nstaples[], int *topdir[], int *sidedir[],
	    int *toplinknum[], int *sidelinknum[], QLA_Real *coef[])
{
#define NC QDP_get_nc(in[0])
  double dtime = QOP_time();
  double nflops = 0;
  int nd = QDP_ndim();
  QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *bt2[nd];
  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      ftmps[i][j] = NULL;
  for(int i=0; i<nd; i++) bt2[i] = NULL;
  t1 = QDP_create_M();
  t2 = QDP_create_M();

  for(int io=0; io<nout; io++) {
    //QOP_printf0("%i: ns: %i\n", io, nstaples[io]);
    for(int s=0; s<nstaples[io]; s++) {
      QLA_Real c = coef[io][s];
      int tn = toplinknum[io][s];
      int sdir = sidedir[io][s];
      //QOP_printf0(" %i:  sdir: %i  c: %g\n", s, sdir, c);
      if(sdir==0) {
	if(c==1) {
	  QDP_M_peq_M(out[io], in[tn], QDP_all);
	  nflops += PEQM;
	} else {
	  QDP_M_peq_r_times_M(out[io], &c, in[tn], QDP_all);
	  nflops += 2*PEQM;
	}
      } else if(sdir>0) {
	int nu = sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	QDP_ColorMatrix *Umunu = getU(tn, mu, nu);
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all);
	if(c==1) {
	  QDP_M_peq_M_times_Ma(out[io], t1, Unumu, QDP_all);
	  nflops += EQMTM+PEQMTM;
	} else {
	  QDP_M_eq_M_times_Ma(t2, t1, Unumu, QDP_all);
	  QDP_M_peq_r_times_M(out[io], &c, t2, QDP_all);
	  nflops += 2*EQMTM+2*PEQM;
	}
      } else {
	int nu = -sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[tn], Unumu, QDP_all);
	QDP_M_eq_Ma_times_M(t2, in[sn], t1, QDP_all);
	QDP_ColorMatrix *tb = shiftb(t2, nu);
	if(c==1) {
	  QDP_M_peq_M(out[io], tb, QDP_all);
	  nflops += 2*EQMTM+PEQM;
	} else {
	  QDP_M_peq_r_times_M(out[io], &c, tb, QDP_all);
	  nflops += 2*EQMTM+2*PEQM;
	}
	QDP_discard_M(tb);
      }
    }
  }

  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]);
  for(int i=0; i<nd; i++) if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]);
  QDP_destroy_M(t1);
  QDP_destroy_M(t2);
  info->final_sec = QOP_time() - dtime;
  info->final_flop = nflops*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
#undef NC
}
コード例 #4
0
ファイル: staples_p.c プロジェクト: usqcd-software/qopqdp
// topdir = 1..nd
// sidedir = -nd..nd
// toplinknum,sidelinknum = 0..nin-1
void
QOP_staples_deriv(QOP_info_t *info, int nout, int nin,
		  QDP_ColorMatrix *deriv[], QDP_ColorMatrix *chain[],
		  QDP_ColorMatrix *in[],
		  int nstaples[], int *topdir[], int *sidedir[],
		  int *toplinknum[], int *sidelinknum[], QLA_Real *coef[])
{
#define NC QDP_get_nc(in[0])
  double dtime = QOP_time();
  double nflops = 0;
  int nd = QDP_ndim();
  QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *t3, *t4, *tc, *bt2[nd], *bt3[nd], *ctmps[nd];
  int ctn[nd];
  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      ftmps[i][j] = NULL;
  for(int i=0; i<nd; i++) bt2[i] = bt3[i] = ctmps[i] = NULL;
  t1 = QDP_create_M();
  t2 = QDP_create_M();
  t3 = QDP_create_M();
  t4 = QDP_create_M();
  tc = QDP_create_M();

  // process in reverse in case calculated staples used as input for others
  for(int io=nout-1; io>=0; io--) {
    for(int i=0; i<nd; i++) {
      if(ctmps[i]) QDP_discard_M(ctmps[i]);
      ctn[i] = 0;
    }
    QDP_M_eq_M(tc, chain[io], QDP_all);
    for(int s=0; s<nstaples[io]; s++) {
      QLA_Real c = coef[io][s];
      int tn = toplinknum[io][s];
      int sdir = sidedir[io][s];
      //QOP_printf0("io: %i  s: %i  sdir: %i  tn: %i  c: %g\n", io, s, sdir, tn, c);
      if(sdir==0) {
	if(c==1) {
	  QDP_M_peq_M(deriv[tn], tc, QDP_all);
	  nflops += PEQM;
	} else {
	  QDP_M_peq_r_times_M(deriv[tn], &c, tc, QDP_all);
	  nflops += 2*PEQM;
	}
      } else if(sdir>0) {
	int nu = sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	//QOP_printf0("  mu: %i  nu: %i  sn: %i\n", mu, nu, sn);
	QDP_ColorMatrix *Umunu = getU(tn, mu, nu);
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all);
	QDP_M_eq_Ma_times_M(t2, tc, t1, QDP_all);
	QDP_ColorMatrix *tb2 = shiftb(t2, mu);
	QDP_M_eq_M_times_M(t1, tc, Unumu, QDP_all);
	QDP_M_eq_Ma_times_M(t3, in[sn], t1, QDP_all);
	QDP_ColorMatrix *tb3 = shiftb(t3, nu);
	if(c==1) {
	  QDP_M_peq_M_times_Ma(deriv[sn], t1, Umunu, QDP_all);
	  QDP_M_peq_M(deriv[sn], tb2, QDP_all);
	  QDP_M_peq_M(deriv[tn], tb3, QDP_all);
	  nflops += 4*EQMTM+PEQMTM+2*PEQM;
	} else {
	  QDP_M_eq_M_times_Ma(t4, t1, Umunu, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all);
	  QDP_M_peq_r_times_M(deriv[tn], &c, tb3, QDP_all);
	  nflops += 5*EQMTM+6*PEQM;
	}
	QDP_discard_M(tb2);
	QDP_discard_M(tb3);
      } else {
	int nu = -sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	QDP_ColorMatrix *Cmunu = getC(nu);
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[sn], Cmunu, QDP_all);
	QDP_M_eq_Ma_times_M(t2, in[tn], t1, QDP_all);
	QDP_ColorMatrix *tb2 = shiftb(t2, mu);
	QDP_M_eq_M_times_M(t3, in[tn], Unumu, QDP_all);
	if(c==1) {
	  QDP_M_peq_M_times_Ma(deriv[tn], t1, Unumu, QDP_all);
	  QDP_M_peq_M_times_Ma(deriv[sn], t3, Cmunu, QDP_all);
	  QDP_M_peq_M(deriv[sn], tb2, QDP_all);
	  nflops += 3*EQMTM+2*PEQMTM+PEQM;
	} else {
	  QDP_M_eq_M_times_Ma(t4, t1, Unumu, QDP_all);
	  QDP_M_peq_r_times_M(deriv[tn], &c, t4, QDP_all);
	  QDP_M_eq_M_times_Ma(t4, t3, Cmunu, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all);
	  nflops += 5*EQMTM+6*PEQM;
	}
	QDP_discard_M(tb2);
      }
    }
  }

  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]);
  for(int i=0; i<nd; i++) {
    if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]);
    if(bt3[i]!=NULL) QDP_destroy_M(bt3[i]);
    if(ctmps[i]!=NULL) QDP_destroy_M(ctmps[i]);
  }
  QDP_destroy_M(t1);
  QDP_destroy_M(t2);
  QDP_destroy_M(t3);
  QDP_destroy_M(t4);
  QDP_destroy_M(tc);
  info->final_sec = QOP_time() - dtime;
  info->final_flop = nflops*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
#undef NC
}
コード例 #5
0
static void 
create_fn_links_qdp(QDP_ColorMatrix *fl[], QDP_ColorMatrix *ll[],
		    QDP_ColorMatrix *gf[], asqtad_path_coeff *coeffs)
{
  
  int i, dir;
  QDP_ColorMatrix *staple, *tempmat1;
  int  nu,rho,sig ;
  QLA_Real one_link;
#ifdef LLTIME
  double nflopfl = 61632;
  double nflopll = 1804;
#endif
  double dtimefl,dtimell;

  for(i=0; i<4; i++) {
    fl[i] = QDP_create_M();
    ll[i] = QDP_create_M();
  }
  staple = QDP_create_M();
  tempmat1 = QDP_create_M();

  dtimefl = -dclock();

  /* to fix up the Lepage term, included by a trick below */
  one_link = coeffs->one_link - 6.0*coeffs->lepage;

  for(dir=0; dir<4; dir++) {
    QDP_M_eq_r_times_M(fl[dir], &one_link, gf[dir], QDP_all);
    for(nu=0; nu<4; nu++) if(nu!=dir) {
      compute_gen_staple(staple, dir, nu, gf[dir],
			 (double)coeffs->three_staple, gf, fl);
      compute_gen_staple(NULL, dir, nu, staple, coeffs->lepage, gf, fl);
      for(rho=0; rho<4; rho++) if((rho!=dir)&&(rho!=nu)) {
	compute_gen_staple(tempmat1, dir, rho, staple,
			   (double)coeffs->five_staple, gf, fl);
	for(sig=0; sig<4; sig++) {
	  if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) {
	    compute_gen_staple(NULL, dir, sig, tempmat1,
			       (double)coeffs->seven_staple, gf, fl);
	  }
	} /* sig */
      } /* rho */
    } /* nu */
  } /* dir */

  dtimell = -dclock();
  dtimefl -= dtimell;
#ifdef LLTIME
  node0_printf("LLTIME(Fat): time = %e (Asqtad opt) mflops = %e\n",dtimefl,
         (Real)nflopfl*volume/(1e6*dtimefl*numnodes()) );
#endif

  /* long links */
  for(dir=0; dir<4; dir++) {
    QLA_Real naik = coeffs->naik;
    QDP_M_eq_sM(staple, gf[dir], QDP_neighbor[dir], QDP_forward, QDP_all);
    QDP_M_eq_M_times_M(tempmat1, gf[dir], staple, QDP_all);
    QDP_discard_M(staple);
    QDP_M_eq_sM(staple, tempmat1, QDP_neighbor[dir], QDP_forward, QDP_all);
    QDP_M_eq_M_times_M(ll[dir], gf[dir], staple, QDP_all);
    QDP_M_eq_r_times_M(ll[dir], &naik, ll[dir], QDP_all);
  }
  
  dtimell += dclock();
#ifdef LLTIME
  node0_printf("LLTIME(long): time = %e (Asqtad opt) mflops = %e\n",dtimell,
         (Real)nflopll*volume/(1e6*dtimell*numnodes()) );
#endif

  QDP_destroy_M(staple);
  QDP_destroy_M(tempmat1);
}
コード例 #6
0
/* Smearing level i*/
static void 
QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, 
				    QDP_ColorMatrix * gf[4],
				    REAL *residues,
				    QDP_ColorVector *x[], 
				    int nterms, 
				    QDP_ColorMatrix *force_accum[4],
				    QDP_ColorMatrix *force_accum_old[4],
				    QDP_ColorMatrix *force_accum_naik_old[4],
				    int internal_num_q_paths,
				    Q_path *internal_q_paths_sorted,
				    int *internal_netbackdir_table)
{
  int i,j,k,lastdir=-99,ipath,ilink;
  int length,dir,odir;
  REAL coeff;

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];;
  QDP_ColorVector *vec_tmp[2];

  int netbackdir;
  size_t nflops = 0;

// table of net path displacements (backwards from usual convention)

  Q_path *this_path;	// pointer to current path

  /* Allocate fields */
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    oprod_along_path[i] = QDP_create_M();
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){ 
    // 0 element is never used (it's unit matrix)
    mats_along_path[i] = QDP_create_M();
  }

  mat_tmp0   = QDP_create_M();
  mat_tmp1   = QDP_create_M();
  for(i=0; i<8; i++) stmp[i] = QDP_create_M();
  tmat       = QDP_create_M();
  vec_tmp[0] = QDP_create_V();
  vec_tmp[1] = QDP_create_V();
 
  // clear force accumulators
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum[dir], QDP_all);

  // loop over paths, and loop over links in path 
  for( ipath=0; ipath<internal_num_q_paths; ipath++ ){
    this_path = &(internal_q_paths_sorted[ipath]); 
    if(this_path->forwback== -1)continue;	// skip backwards dslash 
    length = this_path->length;
    netbackdir = internal_netbackdir_table[ipath];

    // move f(i-1) force from current site in positive direction,
    //  this corresponds to outer product |X><Y| calculated at the endpoint of the path 
    if( netbackdir<8) { // Not a Naik path
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_old[OPP_DIR(netbackdir)],
				 tmat, netbackdir );
    }
    else { // Naik path
      if( NULL==force_accum_naik_old ) {
        QOP_printf0( "hisq_force_multi_smearing_fnmat:  mismatch:\n" );
        QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" );
        exit(0);
      }
      // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_naik_old[OPP_DIR(netbackdir-8)],
				 tmat, netbackdir );
    }

    // figure out how much of the outer products along the path must be
    // recomputed. j is last one needing recomputation. k is first one.
    j=length-1; // default is recompute all
    if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0;

    for(ilink=j;ilink>=k;ilink--){
      link_transport_connection_qdp( oprod_along_path[length-ilink], 
				     oprod_along_path[length-ilink-1], gf,
				     mat_tmp0, stmp, this_path->dir[ilink]  );
      nflops += 198;
    }

    // maintain an array of transports "to this point" along the path.
    //	Don't recompute beginning parts of path if same as last path 
    ilink=0; // first link where new transport is needed
    // Sometimes we don't need the matrix for the last link
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;

    for( ; ilink<k; ilink++ ){
      if( ilink==0 ){
        dir = this_path->dir[0];
	if( GOES_FORWARDS(dir) ){
	  QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir],
		      QDP_backward, QDP_all);
	  QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all);
	  QDP_discard_M(tmat);
	}
	else{
	  QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all);
	}
      }
      else { // ilink != 0
        dir = OPP_DIR(this_path->dir[ilink]);

	link_transport_connection_qdp( mats_along_path[ilink+1], 
				       mats_along_path[ilink], gf,
				       mat_tmp0, stmp, dir );
	nflops += 198;
      }
    } // end loop over links

    // A path has (length+1) points, counting the ends.  At first
    //	 point, no "down" direction links have their momenta "at this
    //	 point". At last, no "up" ... 
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;
    for( ilink=0; ilink<=k; ilink++ ){
      if(ilink<length)dir = this_path->dir[ilink];
      else dir=NODIR;
      coeff = this_path->coeff;
      if( (ilink%2)==1 )coeff = -coeff;
      // add in contribution to the force 
      if( ilink<length && GOES_FORWARDS(dir) ){
	link_gather_connection_qdp(mat_tmp1, 
		       oprod_along_path[length-ilink-1], tmat, dir );
        if(ilink==0) 
	  {
	    QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all);
	  }
        else
	  {
	    QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], 
				mat_tmp1, QDP_all);
	    nflops += 198;
	    QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	  }
	QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      if( ilink>0 && GOES_BACKWARDS(lastdir) ){
	odir = OPP_DIR(lastdir);
        if( ilink==1 ){
	  QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all);
	  QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	}
        else{
	  link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], 
				     tmat, odir );
	  QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], 
			      mat_tmp1, QDP_all);
	  nflops += 198;
	  QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all);
        }
	QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      lastdir = dir;
    } // end loop over links in path //
  } // end loop over paths //

  QDP_destroy_V( vec_tmp[0] );
  QDP_destroy_V( vec_tmp[1] );
  QDP_destroy_M( mat_tmp0 );
  QDP_destroy_M( mat_tmp1 );
  QDP_destroy_M( tmat );
  for(i=0; i<8; i++) QDP_destroy_M(stmp[i]);
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( oprod_along_path[i] );
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( mats_along_path[i] );
  }

  info->final_flop = ((double)nflops)*QDP_sites_on_node;

  return;
}//hisq_force_multi_smearing_fnmat