예제 #1
0
/* On a lattice with lattice constant len, along each axis, multiply
   two adjacent smeared links in src to form a coarse link res on a
   lattice s with lattice constant 2 * len */
void RG_create_gauge(QDP_ColorMatrix *res[RG_Nd], 
		     QDP_ColorMatrix *src[RG_Nd], 
		     QDP_Sub_Block s, int len)
{
  int i,j,k;
  int v[RG_Nd];
  QDP_ColorMatrix *temp,*temp1;
  QDP_Shift offset;
  
  
  temp = QDP_create_M();
  temp1 = QDP_create_M();
  
  for(j=0; j<RG_Nd; ++j)
    {
      
      /* On axis displacement of length len */
      for(k=0; k<RG_Nd; ++k) v[k] = 0;
      v[j] = len; 
      
      offset = QDP_create_shift(v);
      SQDP_M_eq_M_times_sM(res[j],src[j],src[j],offset,QDP_forward,s);
      QDP_destroy_shift(offset);
      
      // printf("Multp........node %d for %d\n",this_node,j); fflush(stdout);
    }
  
  // node0_printf(".......................done\n"); fflush(stdout);
  
  QDP_destroy_M(temp);
  QDP_destroy_M(temp1);
  return;
}
예제 #2
0
/* Computes the staple :
                 mu
              +-------+
        nu    |       |
              |       |
              X       X
  Where the mu link can be any su3_matrix. The result is saved in staple.
  if staple==NULL then the result is not saved.
  It also adds the computed staple to the fatlink[mu] with weight coef.
*/
static void
compute_gen_staple(QDP_ColorMatrix *staple, int mu, int nu,
		   QDP_ColorMatrix *link, double dcoef,
		   QDP_ColorMatrix *gauge[], QDP_ColorMatrix *fl[])
{
  QLA_Real coef = dcoef;
  QDP_ColorMatrix *ts0, *ts1;
  QDP_ColorMatrix *tmat1, *tmat2;
  QDP_ColorMatrix *tempmat;

  ts0 = QDP_create_M();
  ts1 = QDP_create_M();
  tmat1 = QDP_create_M();
  tmat2 = QDP_create_M();
  tempmat = QDP_create_M();

  /* Upper staple */
  QDP_M_eq_sM(ts0, link, QDP_neighbor[nu], QDP_forward, QDP_all);
  QDP_M_eq_sM(ts1, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all);

  if(staple!=NULL) {  /* Save the staple */
    QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all);
    QDP_M_eq_M_times_M(staple, gauge[nu], tmat1, QDP_all);
  } else {  /* No need to save the staple. Add it to the fatlinks */
    QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all);
    QDP_M_eq_M_times_M(tmat2, gauge[nu], tmat1, QDP_all);
    QDP_M_peq_r_times_M(fl[mu], &coef, tmat2, QDP_all);
  }

  /* lower staple */
  QDP_M_eq_sM(ts0, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
  QDP_M_eq_Ma_times_M(tmat1, gauge[nu], link, QDP_all);
  QDP_M_eq_M_times_M(tempmat, tmat1, ts0, QDP_all);
  QDP_M_eq_sM(ts0, tempmat, QDP_neighbor[nu], QDP_backward, QDP_all);

  if(staple!=NULL) { /* Save the staple */
    QDP_M_peq_M(staple, ts0, QDP_all);
    QDP_M_peq_r_times_M(fl[mu], &coef, staple, QDP_all);
  } else {  /* No need to save the staple. Add it to the fatlinks */
    QDP_M_peq_r_times_M(fl[mu], &coef, ts0, QDP_all);
  }

  QDP_destroy_M(ts0);
  QDP_destroy_M(ts1);
  QDP_destroy_M(tmat1);
  QDP_destroy_M(tmat2);
  QDP_destroy_M(tempmat);
} /* compute_gen_staple */
예제 #3
0
void RG_smearing(QDP_ColorMatrix *dest[RG_Nd], QDP_ColorMatrix *src[RG_Nd],QDP_Sub_Block s, int len)
{
 int i;
 QLA_Real staple_w,link0;
 QLA_Int space_only;
 QDP_ColorMatrix *temp[RG_Nd],*sm_link[RG_Nd],*pr_sm_link[RG_Nd];

  for(i=0; i< RG_Nd; ++i)
  {
    sm_link[i] = QDP_create_M();
    pr_sm_link[i] = QDP_create_M();
  }

  RG_value(&staple_w,&link0,&space_only);
  /* Two smearing steps  */

  RG_smearing_qdp(sm_link, src, &staple_w, &link0, s, len);
#ifdef CHECK_SMEAR_QDP_MILC
  project_qdp(sm_link, dest,&space_only);
#else
#ifdef CHECK_DEGRAND_W_SMEAR
  project_qdp(sm_link, dest,&space_only);
#else
#ifdef CHECK_SMEAR_GAUGE_2
  project_qdp(sm_link, dest,&space_only);
#else

  project_qdp(sm_link, pr_sm_link,&space_only);
  RG_smearing_qdp(sm_link, pr_sm_link,&staple_w,&link0,s,len);
  project_qdp(sm_link, dest,&space_only);

#endif
#endif
#endif

  for(i=0; i< RG_Nd; ++i)
  {
   QDP_destroy_M(sm_link[i]);
   QDP_destroy_M(pr_sm_link[i]);
  }

return;
}
예제 #4
0
/* Working from the finest lattice, smear the links at each level of
   coarseness and multiply to form the links on the next higher
   level.  Results in rg_link. */
void RG_gauge(QDP_ColorMatrix *rg_link[NRG][RG_Nd], 
	      QDP_ColorMatrix *link_qdp[RG_Nd], 
	      QDP_Sub_Block s[NRG+1])
{
  int i,j,len;
  QDP_ColorMatrix *pr_sm_link[RG_Nd];
  
  // node0_printf("Smearing links with Degrand trick........\n"); fflush(stdout);
  
  for(i=0; i< RG_Nd; ++i)
    pr_sm_link[i] = QDP_create_M();
  
  
#ifdef CHECK_DEGRAND_WO_SMEAR
  for(i=0; i< RG_Nd; ++i)
    SQDP_M_eq_M(rg_link[nrg-1][i],link_qdp[i],s[nrg]);
#else
  RG_smearing(rg_link[nrg-1],link_qdp,s[nrg],1);
#ifdef CHECK_DEGRAND_W_SMEAR
  SQDP_M_eq_M(rg_link[nrg-1][3],link_qdp[3],s[nrg]);
#endif
#endif
  
  
  /* Work from the finest to the coarsest level */
  for (i=1;i<nrg;i++)
    {
      len = intpow(2,i-1);
      
      //  printf("node %d: Smear links of length %d x a'\n",this_node,len); fflush(stdout);
      //  printf("node %d: rg_links of length %d x a'\n",this_node,2*len); fflush(stdout);
      
      /* Smear the links */
#ifdef CHECK_DEGRAND_WO_SMEAR
      for(j=0; j< RG_Nd; ++j)
	SQDP_M_eq_M(pr_sm_link[j],rg_link[nrg-i][j],s[nrg-i+1]);
#else
      RG_smearing(pr_sm_link,rg_link[nrg-i],s[nrg-i+1],len);
#ifdef CHECK_DEGRAND_W_SMEAR
      SQDP_M_eq_M(pr_sm_link[3],rg_link[nrg-i][3],s[nrg-i+1]);
#endif
#endif
      
      /* Multiply the links */
      RG_create_gauge(rg_link[nrg-i-1],pr_sm_link,s[nrg-i],len);
    }
  
  
  //  node0_printf(".......................done\n"); fflush(stdout);
  for(i=0; i< RG_Nd; ++i)
    QDP_destroy_M(pr_sm_link[i]);

return;

}
예제 #5
0
void 
QOP_hisq_force_multi_fnmat2_qdp(QOP_info_t *info,  
				QOP_FermionLinksHisq *flh,
				QDP_ColorMatrix *force[], 
				QOP_hisq_coeffs_t *hisq_coeff,
				REAL *residues,
				QDP_ColorVector *x[], 
				int *n_orders_naik)
{
#define NC QDP_get_nc(force[0])
  double dtime = QOP_time();

  QDP_ColorMatrix *deriv[4];
  for(int mu=0; mu<4; mu++) {
    deriv[mu] = QDP_create_M();
    QDP_M_eq_zero(deriv[mu], QDP_all);
  }
  QOP_hisq_deriv_multi_fnmat2_qdp(info, flh, deriv, hisq_coeff, residues, x, n_orders_naik);

  // contraction with the link in question should be done here,
  // after contributions from all levels of smearing are taken into account
  // Put antihermitian traceless part into momentum 
  // add force to momentum
  QDP_ColorMatrix *mtmp = QDP_create_M();
  for(int dir=0; dir<4; dir++) {
    QDP_M_eq_M_times_Ma(mtmp, flh->U_links[dir], deriv[dir], QDP_all);
    QDP_M_eq_antiherm_M(deriv[dir], mtmp, QDP_all);
    QDP_M_peq_M(force[dir], deriv[dir], QDP_all);
  }
  info->final_flop += (4.*(198+24+18))*QDP_sites_on_node; 

  QDP_destroy_M(mtmp);
  for(int mu=0; mu<4; mu++) {
    QDP_destroy_M(deriv[mu]);
  }

  info->final_sec = QOP_time() - dtime;
  //QOP_printf0("HISQ force flops = %g\n", info->final_flop);
#undef NC
}
예제 #6
0
void RG_check_subset(QDP_Sub_Block QDP_block[NRG+1])
{
int i,j,len;
QDP_ColorMatrix *link_qdp[RG_Nd],*prova[RG_Nd];
QLA_Complex unit;

  for(i=0; i< RG_Nd; ++i)
   {
    link_qdp[i] = QDP_create_M();
    prova[i] = QDP_create_M();
   }

  QLA_c_eq_r(unit,1.0)


  for(i=0; i<RG_Nd; ++i)
  SQDP_M_eq_c(link_qdp[i],&unit,QDP_block[nrg]);
  
//  printf("Created!!! this node %d\n",this_node); fflush(stdout);

  for(i=0; i<RG_Nd; ++i)
   SQDP_M_eq_sM(prova[i], link_qdp[i], QDP_neighbor[i], QDP_forward, QDP_block[nrg-1]);

//  printf("I am out!!! this node %d\n",this_node); fflush(stdout);
//  SQDP_M_eq_func(prova[0],print_gl,QDP_block[nrg-1]);
//  printf("I have printed!!! this node %d\n",this_node); fflush(stdout);
 
 
  for(i=0; i< RG_Nd; ++i)
   {
    QDP_destroy_M(link_qdp[i]);
    QDP_destroy_M(prova[i]);
   }

  printf("I have destroyed every thing!!! this node %d\n",this_node); fflush(stdout);

return;
}
예제 #7
0
static void
setup_cg(void)
{
  static int is_setup=0;
  if(!is_setup) {
    int i;
    is_setup = 1;
    psi = QDP_create_D();
    chi = QDP_create_D();
    cgp = QDP_create_D();
    cgr = QDP_create_D();
    mp = QDP_create_D();
    ttt = QDP_create_D();
    tt1 = QDP_create_D();
    tt2 = QDP_create_D();
    t1 = QDP_create_D();
    t2 = QDP_create_D();
    t3 = QDP_create_D();
    //dtemp0 = QDP_create_H();
    for(i=0; i<4; i++) {
#ifndef PRESHIFT_LINKS
      gaugelink[i] = QDP_create_M();
#endif
    }
    for(i=0; i<8; i++) {
#ifdef PRESHIFT_LINKS
      gaugelink[i] = QDP_create_M();
#endif
      //dtemp1[i] = QDP_create_H();
      //temp1[i] = QDP_create_H();
      //temp2[i] = QDP_create_H();
      temp1[i] = QDP_create_D();
      temp2[i] = QDP_create_D();
      temp3[i] = QDP_create_D();
      temp4[i] = QDP_create_D();
    }
  }
}
예제 #8
0
static QDP_ColorMatrix *
cacheshift(QDP_ColorMatrix **tmp, QDP_ColorMatrix *in, int mu, QDP_ShiftDir dir, int redo)
{
#define NC QDP_get_nc(in)
  QDP_ColorMatrix *r = *tmp;
  if(r==NULL) {
    r = *tmp = QDP_create_M();
    redo = 1;
  }
  if(redo) {
    QDP_M_eq_sM(r, in, QDP_neighbor[mu], dir, QDP_all);
  }
  return r;
#undef NC
}
예제 #9
0
파일: qopwilson.c 프로젝트: jcosborn/qll
void
qopWilsonSolve(Layout *l, real *x, real *u[8], real mass, real *y,
	       double rsq, char *sub)
{
  QDP_ColorMatrix *qu[4];
  QDP_DiracFermion *out, *in;
  in = QDP_create_D();
  out = QDP_create_D();
  unpackD(l, in, y);
  unpackD(l, out, x);
  for(int i=0; i<4; i++) {
    qu[i] = QDP_create_M();
    unpackM(l, qu[i], u[2*i]);
    QLA_Real two = 2;
    QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all);
  }
  QOP_FermionLinksWilson *fla;
  fla = QOP_wilson_create_L_from_qdp(qu, NULL);
  QOP_evenodd_t eo=QOP_EVENODD;
  if(sub[0]=='e') {
    eo = QOP_EVEN;
  }
  if(sub[0]=='o') {
    eo = QOP_ODD;
  }
  QOP_info_t info = QOP_INFO_ZERO;
  QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT;
  QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT;
  res_arg.rsqmin = rsq;
  inv_arg.max_iter = 1000;
  inv_arg.restart = 500;
  inv_arg.max_restarts = 5;
  inv_arg.evenodd = eo;
  inv_arg.mixed_rsq = 0;

  QDP_D_eq_zero(out, QDP_even);
  //QOP_verbose(3);
  QOP_wilson_invert_qdp(&info, fla, &inv_arg, &res_arg, mass, out, in);
  //QLA_Real n2;
  //QDP_r_eq_norm2_D(&n2, (QDP_DiracFermion*)out, QDP_all);
  printf0("QOP its: %i\n", res_arg.final_iter);
  packD(l, x, out);
  QDP_destroy_D(in);
  QDP_destroy_D(out);
  for(int i=0; i<4; i++) {
    QDP_destroy_M(qu[i]);
  }
}
예제 #10
0
파일: qopwilson.c 프로젝트: jcosborn/qll
void
qopWilsonDslash(Layout *l, real *x, real *u[8], real mass, int sign,
		real *y, char *sub)
{
  QDP_ColorMatrix *qu[4];
  QDP_DiracFermion *out, *in;
  in = QDP_create_D();
  out = QDP_create_D();
  unpackD(l, in, y);
  unpackD(l, out, x);
  for(int i=0; i<4; i++) {
    qu[i] = QDP_create_M();
    unpackM(l, qu[i], u[2*i]);
    QLA_Real two = 2;
    QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all);
  }
  QOP_FermionLinksWilson *fla;
  fla = QOP_wilson_create_L_from_qdp(qu, NULL);
  QOP_evenodd_t eoOut=QOP_EVENODD, eoIn=QOP_EVENODD;
  if(sub[0]=='e') {
    eoOut = QOP_EVEN;
    eoIn = QOP_ODD;
  }
  if(sub[0]=='o') {
    eoOut = QOP_ODD;
    eoIn = QOP_EVEN;
  }
  real kappa = 0.5/(4+mass);
  QOP_wilson_dslash_qdp(NULL, fla, kappa, sign, out, in, eoOut, eoIn);
  QLA_Real n2;
  QDP_r_eq_norm2_D(&n2, out, QDP_all);
  printf0("out2: %g\n", n2);
  packD(l, x, out);
  QDP_destroy_D(in);
  QDP_destroy_D(out);
  for(int i=0; i<4; i++) {
    QDP_destroy_M(qu[i]);
  }
}
예제 #11
0
void RG_smear_dir (QDP_ColorMatrix *sm_link, 
		   QDP_ColorMatrix *link[], 
		   QLA_Real w_l, QLA_Real w_s, 
		   QLA_Int dir, QDP_Sub_Block s, int len)
{
  int i,v[RG_Nd],n;
  QLA_Int nu,mu=dir;
  QDP_Subset sub;
  QDP_Shift offset[RG_Nd];
  QLA_Complex unit;
  QDP_ColorMatrix *temp1, *temp2, *temp3, *temp4, *temp5, *temp6;
  
  
  temp1 = QDP_create_M();
  temp2 = QDP_create_M();
  temp3 = QDP_create_M();
  temp4 = QDP_create_M();
  temp5 = QDP_create_M();
  temp6 = QDP_create_M();
  
  for(nu=0; nu < RG_Nd ; nu++)
    {
      for(i=0; i<RG_Nd;i++) v[i] = 0;
      v[nu] = len;
      offset[nu] = QDP_create_shift(v);
    }
  
  SQDP_M_eq_r_times_M(temp6,&w_l,link[mu],s);
  
  /* Set temp4 to zero */
  SQDP_M_eq_zero(temp4,s);
  
  n = RG_Nd;
#ifdef CHECK_SMEAR_QDP_MILC
  n = 3;
#endif
  
  /* Sum on staples */
  for(nu=0; nu < n ; nu++)if(nu != mu)
    {
      
      /* For forward staples */
      SQDP_M_eq_sM(temp1, link[mu], offset[nu], QDP_forward, s);
      SQDP_M_eq_sM(temp2, link[nu], offset[mu], QDP_forward, s);
      SQDP_M_eq_M_times_Ma(temp3, temp1, temp2, s);
      SQDP_M_peq_M_times_M(temp4, link[nu], temp3, s);
      
      /* For backward staples */
      SQDP_M_eq_M_times_M(temp3, link[mu], temp2, s);
      SQDP_M_eq_Ma_times_M(temp1, link[nu], temp3, s);
      SQDP_M_eq_sM(temp5, temp1, offset[nu], QDP_backward, s);
      SQDP_M_peq_M(temp4, temp5, s);
      
    }
  
  /* U_smeared = w_l * U + w_s * U_staple */
  SQDP_M_eq_r_times_M_plus_M(sm_link,&w_s,temp4,temp6,s);
  
  
  QDP_destroy_M(temp1);
  QDP_destroy_M(temp2);
  QDP_destroy_M(temp3);
  QDP_destroy_M(temp4);
  QDP_destroy_M(temp5);
  QDP_destroy_M(temp6);
  
  for(nu=0; nu < RG_Nd ; nu++)
    QDP_destroy_shift(offset[nu]);
  
  return ;
  
}
void 
QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, 
		   QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps)
{
  REAL Plaq, Rect, Pgm ;
  QDP_ColorMatrix *tempmom_qdp[4];
  QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu)
  QDP_ColorMatrix *tmpmat;
  QDP_ColorMatrix *tmpmat1;
  QDP_ColorMatrix *tmpmat2;
  QDP_ColorMatrix *staples;
  QDP_ColorMatrix *tmpmat3;
  QDP_ColorMatrix *tmpmat4;

  int i, k;
  int mu, nu, sig;
  double dtime;
  //REAL eb3 = -eps*beta/3.0;
  REAL eb3 = -eps/3.0;
  int j[3][2] = {{1,2},
                 {0,2},
                 {0,1}};
  
  //  QOP_printf0("beta: %e, eb3: %e\n", beta, eb3);
  dtime = -QOP_time();

  for(mu=0; mu<4; mu++) {
    tempmom_qdp[mu] = QDP_create_M();
    QDP_M_eq_zero(tempmom_qdp[mu], QDP_all);
  }

  tmpmat = QDP_create_M();
  for(i=0; i<QOP_common.ndim; i++) {
    fblink[i] = gauge->links[i];
    fblink[OPP_DIR(i)] = QDP_create_M();
    QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all);
    QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all);
  }
  

  for(i=0; i<6; i++) {
    Amu[i] = QDP_create_M();
  }

  staples = QDP_create_M();
  tmpmat1 = QDP_create_M();
  tmpmat2 = QDP_create_M();
  tmpmat3 = QDP_create_M();
  tmpmat4 = QDP_create_M();

  Plaq = coeffs->plaquette;
  Rect = coeffs->rectangle;
  Pgm  = coeffs->parallelogram;

  //Construct 3-staples and rectangles
  for(mu=0; mu<4; mu++) {
    i=0;
    for(nu=0; nu<4; nu++) {
      if(nu!=mu){
	// tmpmat1 = Umu(x+nu)
	QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); 
        QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all);

        //tmpmat2 = Umu(x-nu)
	QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all);
        QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all);
       

 
	//tmpmat = U_{nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all);        
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all);
 
        //tmpmat = U_{-nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all);
        QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all);

        //tmpmat = U_{-nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all);        
        QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all);




        //tmpmat = U_{-nu}(x+mu) 
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all);        
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all);
        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all);
        QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all);

        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all);
        i++;
      }
      
    }

    // Construct the  pgm staples and add them to force
    QDP_M_eq_zero(staples, QDP_all);
    i=0;
    for(nu=0; nu<4; nu++){
      if(nu!=mu){
        k=0;
	for(sig=0; sig<4;sig ++){
	  if(sig!=mu && nu!=sig){
	    
	    // the nu_sig_mu ... staple and 3 reflections
            //tmpmat = Amu["sig"](x+nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all);   
            //tmpmat3 = Unu(x+mu+sig)
            QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE?
            //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = Usig(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);


            //tmpmat = Amu["sig"](x-nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all);   
            //tmpmat3 = U_{-nu}(x+mu+sig)
            QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE?
            //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = Usig(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);


            //tmpmat = Amu["-sig"](x-nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all);   
            //tmpmat = U_{-nu}(x+mu-sig)
            QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE?
            //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = U_{-sig}(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);

            


            //tmpmat = Amu["-sig"](x+nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["-sig"](x+nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all);   
            //tmpmat3 = Unu(x+mu-sig)
            QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE?
            //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = U_{-sig}(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);

	    k++;
	  }//close if sig!=nu ...
	}//close sig loop
	i++;
      }// close if nu!=mu
    }//close the pgm nu loop

    QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all);
   

    
  }// closes the mu loop

#ifdef CHKSUM
  QLA_ColorMatrix qcm;
  QLA_Complex det, chk;
  QLA_c_eq_r(chk, 0);
#endif
  for(mu=0; mu<4; mu++){
    QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE?
    QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE?
    QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE
#ifdef CHKSUM
    QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all);
    QLA_C_eq_det_M(&det, &qcm);
    QLA_c_peq_c(chk, det);
#endif
  }
#ifdef CHKSUM
  QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk));
#endif

  //DESTROY various fields

  QDP_destroy_M(tmpmat);
  QDP_destroy_M(tmpmat1);
  QDP_destroy_M(tmpmat2);
  QDP_destroy_M(tmpmat3);
  QDP_destroy_M(staples);
  QDP_destroy_M(tmpmat4);

  for(mu=0; mu<4; mu++){
    QDP_destroy_M(tempmom_qdp[mu]);
  }
  for(i=0; i<6; i++) {
    QDP_destroy_M(Amu[i]);
  }

  for(i=4; i<8; i++) {
    QDP_destroy_M(fblink[i]);
  }

  dtime += QOP_time();

  double nflop = 96720;
  info->final_sec = dtime;
  info->final_flop = nflop*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
  //QOP_printf0("Time in slow g_force: %e\n", info->final_sec);
} 
예제 #13
0
void RG_create_path(QDP_ColorMatrix *pr_wlink[RG_Ncn], QDP_ColorMatrix *link_qdp[RG_Nd], QDP_Sub_Block s,int len)
{
  int i,j,k,t,x[4];
  int count,c2,space_only;
  QDP_ColorMatrix *path_1[4];
  QDP_ColorMatrix *path_2[12];
  QDP_ColorMatrix *path_3[24];
  QDP_ColorMatrix *path_4[24];
  QDP_ColorMatrix *wlink[RG_Ncn];
  QDP_Shift offset;
  shift_v *d1,*d2,*d3,*d4;
  QLA_Real c = 1.0;
  QLA_Real fact2 = 1.0/2.0;
  QLA_Real fact3 = 1.0/6.0;
  QLA_Real fact4 = 1.0/24.0;
  QLA_Complex unit;



   d1 = (shift_v *) malloc(4*sizeof(shift_v));
   d2 = (shift_v *) malloc(12*sizeof(shift_v));
   d3 = (shift_v *) malloc(24*sizeof(shift_v));
   d4 = (shift_v *) malloc(24*sizeof(shift_v));
  
   for (i = 0; i < RG_Ncn; i++)
    wlink[i] = QDP_create_M();

   for (i = 0; i < 4; i++)
    path_1[i] = QDP_create_M();
   for (i = 0; i < 4; i++)
    SQDP_M_eq_M(path_1[i],link_qdp[i],s);

   for (i = 0; i < 12; i++)
    path_2[i] = QDP_create_M();

   for (i = 0; i < 24; i++)
   {
   path_3[i] = QDP_create_M();
   path_4[i] = QDP_create_M();
   }


//   printf("Start building paths %d\n",this_node); fflush(stdout);
   for (i = 0; i < RG_Nd; i++)
   {
    x[0] = i; 
    d1[i] = create_shift(x,1,len); 
   }
//   printf("First shift %d\n",this_node);fflush(stdout);

   count = 0;
   for (i = 0; i < RG_Nd; i++)
   {
   x[0] = i;
   c2 = find_count(d1,x,1);
   offset = QDP_create_shift(d1[c2].s);
   for (j = 0; j < RG_Nd ; j++) if ( j != i)
   {
   x[1] = j;
   d2[count] = create_shift(x,2,len); 
   SQDP_M_eq_M_times_sM(path_2[count],path_1[c2],link_qdp[j],offset,QDP_forward,s);
   count ++;
   }
   QDP_destroy_shift(offset);
   }
  // printf("Second shift %d\n",this_node);fflush(stdout);

   

   count = 0;
   for (i = 0; i < RG_Nd; i++)
   for (j = 0; j < RG_Nd; j++) if (j != i)
   {
   x[0] = i; x[1] = j;
   c2 = find_count(d2,x,2);
   offset = QDP_create_shift(d2[c2].s);
   for (k = 0; k < RG_Nd; k++) if (k != i) if (k != j)
   {
   x[2] = k;
   d3[count] = create_shift(x,3,len);
   SQDP_M_eq_M_times_sM(path_3[count],path_2[c2],link_qdp[k],offset,QDP_forward,s);
   count++;
   }
   QDP_destroy_shift(offset);
   }
//   printf("Third shift %d\n",this_node);fflush(stdout);

   count = 0;
   for (i = 0; i < RG_Nd; i++)
   for (j = 0; j < RG_Nd; j++) if (j != i)
   for (k = 0; k < RG_Nd; k++) if (k != i) if (k != j)
   {
   x[0] = i; x[1] = j; x[2] = k;
   c2 = find_count(d3,x,3);
   offset = QDP_create_shift(d3[c2].s);
   for (t = 0; t < RG_Nd; t++) if (t != i) if (t != j) if (t != k)
   {
   x[3] = t;
   d4[count] = create_shift(x,4,len);
   SQDP_M_eq_M_times_sM(path_4[count],path_3[c2],link_qdp[t],offset,QDP_forward,s);
   count++;
   }
   QDP_destroy_shift(offset);
   }
//   printf("Fourth shift %d\n",this_node);fflush(stdout);

   QLA_C_eq_R(&unit,&c);
   SQDP_M_eq_c(wlink[0],&unit,s);

   for (i=1;i<5;i++)
   SQDP_M_eq_M(wlink[i],path_1[i-1],s);
   
   for (i=5;i<RG_Ncn;i++)
   {
   SQDP_M_eq_zero(wlink[i],s);

   for (j=0;j<12;j++) if(d2[j].rv == i)
    SQDP_M_peq_r_times_M(wlink[i],&fact2,path_2[j],s);

   for (j=0;j<24;j++) if(d3[j].rv == i)
    SQDP_M_peq_r_times_M(wlink[i],&fact3,path_3[j],s);

   for (j=0;j<24;j++) if(d4[j].rv == i)
    SQDP_M_peq_r_times_M(wlink[i],&fact4,path_3[j],s);

   }

   
   space_only = RG_Ncn;
//   printf("projection %d\n",this_node);fflush(stdout);
   project_qdp(wlink, pr_wlink,&space_only);
 
   for (i = 0; i < 4; i++)
    QDP_destroy_M(path_1[i]);
   
   for (i = 0; i < 12; i++)
    QDP_destroy_M(path_2[i]);

   for (i = 0; i < 24; i++)
   {
    QDP_destroy_M(path_3[i]);
    QDP_destroy_M(path_4[i]);
   }

   for (i = 0; i < RG_Ncn; i++)
    QDP_destroy_M(wlink[i]);

   free(d1);
   free(d2);
   free(d3);
   free(d4);


return;

}
예제 #14
0
파일: qopwilson.c 프로젝트: jcosborn/qll
void
qopWilsonSolveMulti(Layout *l, real *x[], real *u[8], double masses[],
		    real *y, int nmasses, double rsq, char *sub)
{
  QDP_ColorMatrix *qu[4];
  QDP_DiracFermion *out[nmasses], *in, **outp;
  outp = out;
  in = QDP_create_D();
  unpackD(l, in, y);
  for(int i=0; i<nmasses; i++) {
    out[i] = QDP_create_D();
    unpackD(l, out[i], x[i]);
    QDP_D_eq_zero(out[i], QDP_even);
  }
  for(int i=0; i<4; i++) {
    qu[i] = QDP_create_M();
    unpackM(l, qu[i], u[2*i]);
    QLA_Real two = 2;
    QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all);
  }
  QOP_FermionLinksWilson *fla;
  fla = QOP_wilson_create_L_from_qdp(qu, NULL);
#if 0
  QOP_evenodd_t eo = QOP_EVENODD;
  if(sub[0]=='e') {
    eo = QOP_EVEN;
  }
  if(sub[0]=='o') {
    eo = QOP_ODD;
  }
#endif
  QOP_evenodd_t eo = QOP_EVEN;
  QOP_info_t info = QOP_INFO_ZERO;
  QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT;
  inv_arg.max_iter = 1000;
  inv_arg.restart = 500;
  inv_arg.max_restarts = 5;
  inv_arg.evenodd = eo;
  inv_arg.mixed_rsq = 0;
  QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT;
  res_arg.rsqmin = rsq;
  QOP_resid_arg_t *ra[nmasses];
  QOP_resid_arg_t **rap = ra;
  real mf[nmasses], *mfp;
  mfp = mf;
  for(int i=0; i<nmasses; i++) {
    ra[i] = &res_arg;
    mf[i] = masses[i];
  }

  //QOP_verbose(3);
  QOP_wilson_invert_multi_qdp(&info, fla, &inv_arg, &rap,
			      &mfp, &nmasses, &outp, &in, 1);
  //QLA_Real n2;
  //QDP_r_eq_norm2_D(&n2, (QDP_DiracFermion*)out, QDP_all);
  printf0("QOP its: %i\n", res_arg.final_iter);
  QDP_destroy_D(in);
  for(int i=0; i<nmasses; i++) {
    packD(l, x[i], out[i]);
    QDP_destroy_D(out[i]);
  }
  for(int i=0; i<4; i++) {
    QDP_destroy_M(qu[i]);
  }
}
예제 #15
0
/* Smearing level 0 */
static void 
QOP_hisq_force_multi_smearing0_fnmat(QOP_info_t *info,  
				     REAL *residues,
				     QDP_ColorVector *x[], 
				     int nterms, 
				     QDP_ColorMatrix *force_accum[4],
				     QDP_ColorMatrix *force_accum_naik[4])
{
  int term;
  int i,k;
  int dir;
  REAL coeff;

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mat_tmp0;
  QDP_ColorVector *tsrc[2], *vec_tmp[2];
  size_t nflops = 0;

  if( nterms==0 )return;

  mat_tmp0   = QDP_create_M();
  tmat       = QDP_create_M();
  tsrc[0] = QDP_create_V();
  tsrc[1] = QDP_create_V();
  vec_tmp[0] = QDP_create_V();
  vec_tmp[1] = QDP_create_V();

  for(i=0;i<=MAX_PATH_LENGTH;i++){
    oprod_along_path[i] = QDP_create_M();
  }

  // clear force accumulators
  
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum[dir], QDP_all);

  for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed
    k=0; // which vec_tmp we are using (0 or 1)
    QDP_V_eq_V(tsrc[k], x[0], QDP_all);
    QDP_V_eq_sV(vec_tmp[k], tsrc[k], 
		fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all);
    QDP_M_eq_zero(oprod_along_path[0], QDP_all);

    for(term=0;term<nterms;term++){
      if(term<nterms-1) {
	QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all);
	QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], 
		    fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all);
      }
      //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all);
      QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all);
      nflops += 54;
      QDP_discard_V(vec_tmp[k]);
      QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, 
			  QDP_all);
      nflops += 36;
      
      k=1-k; // swap 0 and 1
    } // end loop over terms in rational function expansion 

    link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat,
			       dir );
    coeff = 1.;
    QDP_M_peq_r_times_M(force_accum[dir],&coeff,oprod_along_path[1],QDP_all);
    nflops += 36;

  } // end of loop on directions //


  // *** Naik part *** /
  
  // clear force accumulators
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum_naik[dir], QDP_all);


  for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed
    k=0; // which vec_tmp we are using (0 or 1)
    QDP_V_eq_V(tsrc[k], x[0], QDP_all);
    QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_3_DIR( DIR3(dir) )), 
		fndir(OPP_3_DIR( DIR3(dir) )), QDP_all);

    QDP_M_eq_zero(oprod_along_path[0], QDP_all);

    for(term=0;term<nterms;term++){
      if(term<nterms-1) {
	QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all);
	QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_3_DIR( DIR3(dir) )), 
		    fndir(OPP_3_DIR( DIR3(dir) )), QDP_all);
      }
      //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all);
      QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all);
      nflops += 54;
      QDP_discard_V(vec_tmp[k]);
      QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all);
      nflops += 36;

      k=1-k; // swap 0 and 1
    } // end loop over terms in rational function expansion 

    link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, 
			       DIR3(dir) );
    coeff = 1; // fermion_eps is outside this routine in "wrapper" routine
    QDP_M_peq_r_times_M(force_accum_naik[dir],&coeff,
			oprod_along_path[1],QDP_all);
    nflops += 36;
  } // end of loop on directions 

  QDP_destroy_V( tsrc[0] );
  QDP_destroy_V( tsrc[1] );
  QDP_destroy_V( vec_tmp[0] );
  QDP_destroy_V( vec_tmp[1] );
  QDP_destroy_M( mat_tmp0 );
  QDP_destroy_M( tmat );
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( oprod_along_path[i] );
  }

  info->final_flop = ((double)nflops)*QDP_sites_on_node;
  return;
} //hisq_force_multi_smearing0_fnmat
예제 #16
0
void load_asqtad_links(int both, ferm_links_t *fn, ks_action_paths *ap) {

  su3_matrix **t_fl = &fn->fat;
  su3_matrix **t_ll = &fn->lng;
  Real *act_path_coeff = ap->act_path_coeff;

  QDP_ColorMatrix *fl[4];
  QDP_ColorMatrix *ll[4];
  QDP_ColorMatrix *gf[4];
  int dir;
  double remaptime = -dclock();
  char myname[] = "load_asqtad_links";
  
  asqtad_path_coeff c;

  if( phases_in != 1){
    node0_printf("%s: BOTCH: needs phases in\n",myname);
    terminate(1);
  }

  /* Create QDP fields for fat links, long links, and temp for gauge field */
  FORALLUPDIR(dir){
    fl[dir] = QDP_create_M();
    ll[dir] = QDP_create_M();
    gf[dir] = QDP_create_M();
  }

  /* Map gauge links to QDP */
  set4_M_from_site(gf, F_OFFSET(link), EVENANDODD);

  /* Load Asqtad path coefficients from table */
  c.one_link     = act_path_coeff[0]; 
  c.naik         = act_path_coeff[1];
  c.three_staple = act_path_coeff[2];
  c.five_staple  = act_path_coeff[3];
  c.seven_staple = act_path_coeff[4];
  c.lepage       = act_path_coeff[5];

  /* Compute fat and long links as QDP fields */
  remaptime += dclock();
  create_fn_links_qdp(fl, ll, gf, &c);
  remaptime -= dclock();

  /* Clean up */
  FORALLUPDIR(dir){
    QDP_destroy_M(gf[dir]);
  }

  /* Allocate space for t_fatlink if NULL */
  if(*t_fl == NULL){
    *t_fl = (su3_matrix *)special_alloc(sites_on_node*4*sizeof(su3_matrix));
    if(*t_fl==NULL){
      printf("%s(%d): no room for t_fatlink\n",myname,this_node);
      terminate(1);
    }
  }
  
  /* Allocate space for t_longlink if NULL and we are doing both fat
     and long */
  if(*t_ll == NULL && both){
    *t_ll = (su3_matrix *)special_alloc(sites_on_node*4*sizeof(su3_matrix));
    if(*t_ll==NULL){
      printf("%s(%d): no room for t_longlink\n",myname,this_node);
      terminate(1);
    }
  }

  /* Map QDP fields to MILC order */
  set4_field_from_M(*t_fl, fl, EVENANDODD);
  if(both)set4_field_from_M(*t_ll, ll, EVENANDODD);

  /* Clean up */
  FORALLUPDIR(dir){
    QDP_destroy_M(fl[dir]);
    QDP_destroy_M(ll[dir]);
  }
  
  fn->valid = 1;

  remaptime += dclock();
#ifdef LLTIME
#ifdef REMAP
  node0_printf("LLREMAP:  time = %e\n",remaptime);
#endif
#endif
}
예제 #17
0
void
start(void)
{
  double mf, best_mf;
  QLA_Real plaq;
  QDP_ColorMatrix **u;
  int i, bs, bsi, best_bs;

  u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *));
  for(i=0; i<ndim; i++) u[i] = QDP_create_M();
  get_random_links(u, ndim, 0.3);

  plaq = get_plaq(u);
  if(QDP_this_node==0) printf("plaquette = %g\n", plaq);
  
  QOP_layout_t qoplayout = QOP_LAYOUT_ZERO;
  qoplayout.latdim = ndim;
  qoplayout.latsize = (int *) malloc(ndim*sizeof(int));
  for(i=0; i<ndim; i++) {
    qoplayout.latsize[i] = lattice_size[i];
  }
  qoplayout.machdim = -1;

  if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); }
  QOP_init(&qoplayout);

  gauge = QOP_create_G_from_qdp(u);

  QOP_Force *force;

  QDP_ColorMatrix *cm[4];
  for(i=0; i<4; i++) {
    cm[i] = QDP_create_M();
    QDP_M_eq_zero(cm[i], QDP_all);
  }

  QOP_gauge_coeffs_t gcoeffs = QOP_GAUGE_COEFFS_ZERO;
  gcoeffs.plaquette  = 0.2;
  gcoeffs.rectangle  = 0.2;
  gcoeffs.parallelogram   = 0.2;
  gcoeffs.adjoint_plaquette = 0.2;

  force = QOP_create_F_from_qdp(cm);
  mf = bench_action(&gcoeffs, force);
  QOP_destroy_F(force);
  printf0("action: sec%7.4f mflops = %g\n", secs, mf);

  if(QDP_this_node==0) { printf("begin force\n"); fflush(stdout); }

  best_mf = 0;
  best_bs = bsa[0];
  for(bsi=0; bsi<bsn; bsi++) {
    bs = bsa[bsi];
    QDP_set_block_size(bs);
    force = QOP_create_F_from_qdp(cm);
    mf = bench_force(&gcoeffs, force);
    QOP_destroy_F(force);
    printf0("GF: bs%5i sec%7.4f mflops = %g\n", bs, secs, mf);
    if(mf>best_mf) {
      best_mf = mf;
      best_bs = bs;
    }
  }

  QDP_set_block_size(best_bs);
  QDP_profcontrol(1);
  force = QOP_create_F_from_qdp(cm);
  mf = bench_force(&gcoeffs, force);
  QDP_profcontrol(0);
  printf0("prof: GF: bs%5i sec%7.4f mflops = %g\n", best_bs, secs, mf);

  printf0("best: GF: bs%5i mflops = %g\n", best_bs, best_mf);

  if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); }
  //QOP_asqtad_invert_unload_links();
  if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); }
  QOP_finalize();
}
예제 #18
0
void
start(void)
{
  double mf, best_mf;
  QLA_Real plaq;
  QDP_ColorMatrix **u;
  QDP_DiracFermion *out, *in;
  int i, st, ns, nm, bs, sti, nsi, nmi, bsi,
    best_st, best_ns, best_nm, best_bs;

  u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *));
  for(i=0; i<ndim; i++) u[i] = QDP_create_M();
  get_random_links(u, ndim, 0.2);

  plaq = get_plaq(u);
  if(QDP_this_node==0) printf("plaquette = %g\n", plaq);

  out = QDP_create_D();
  in = QDP_create_D();
  QDP_D_eq_gaussian_S(in, rs, QDP_all);

  QOP_layout_t qoplayout = QOP_LAYOUT_ZERO;
  qoplayout.latdim = ndim;
  qoplayout.latsize = (int *) malloc(ndim*sizeof(int));
  for(i=0; i<ndim; i++) {
    qoplayout.latsize[i] = lattice_size[i];
  }
  qoplayout.machdim = -1;

  QOP_info_t info = QOP_INFO_ZERO;
  QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT;
  QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT;
  res_arg.rsqmin = rsqmin;
  inv_arg.max_iter = 600;
  inv_arg.restart = 200;
  inv_arg.evenodd = QOP_EVEN;

  if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); }
  QOP_init(&qoplayout);
  if(QDP_this_node==0) { printf("begin load links\n"); fflush(stdout); }
  //flw = QOP_wilson_create_L_from_qdp(u, NULL);
  if(QDP_this_node==0) { printf("begin invert\n"); fflush(stdout); }

  if(cgtype>=0) {
    QOP_opt_t optcg;
    optcg.tag = "cg";
    optcg.value = cgtype;
    QOP_wilson_invert_set_opts(&optcg, 1);
  }

  best_mf = 0;
  best_st = sta[0];
  best_ns = nsa[0];
  best_nm = nma[0];
  best_bs = bsa[0];
  QOP_opt_t optst;
  optst.tag = "st";
  QOP_opt_t optns;
  optns.tag = "ns";
  QOP_opt_t optnm;
  optnm.tag = "nm";
  for(sti=0; sti<stn; sti++) {
    if((style>=0)&&(sti!=style)) continue;
    st = sta[sti];
    optst.value = st;
    if(QOP_wilson_invert_set_opts(&optst, 1)==QOP_FAIL) continue;
    for(nsi=0; nsi<nsn; nsi++) {
      ns = nsa[nsi];
      optns.value = ns;
      if(QOP_wilson_invert_set_opts(&optns, 1)==QOP_FAIL) continue;
      for(nmi=0; nmi<nmn; nmi++) {
	nm = nma[nmi];
	if(nm==0) nm = ns;
	optnm.value = nm;
	if(QOP_wilson_invert_set_opts(&optnm, 1)==QOP_FAIL) continue;
	for(bsi=0; bsi<bsn; bsi++) {
	  bs = bsa[bsi];
	  QDP_set_block_size(bs);
  flw = QOP_wilson_create_L_from_qdp(u, NULL);
	  mf = bench_inv(&info, &inv_arg, &res_arg, out, in);
  QOP_wilson_destroy_L(flw);
	  printf0("CONGRAD: st%2i ns%2i nm%2i bs%5i iter%5i sec%7.4f mflops = %g\n", st,
		  ns, nm, bs, res_arg.final_iter, info.final_sec, mf);
	  if(mf>best_mf) {
	    best_mf = mf;
	    best_st = st;
	    best_ns = ns;
	    best_nm = nm;
	    best_bs = bs;
	  }
	}
      }
    }
  }
  flw = QOP_wilson_create_L_from_qdp(u, NULL);

  optst.value = best_st;
  optns.value = best_ns;
  optnm.value = best_nm;
  QOP_wilson_invert_set_opts(&optst, 1);
  QOP_wilson_invert_set_opts(&optns, 1);
  QOP_wilson_invert_set_opts(&optnm, 1);
  QDP_set_block_size(best_bs);
  QDP_profcontrol(1);
  mf = bench_inv(&info, &inv_arg, &res_arg, out, in);
  QDP_profcontrol(0);
  printf0("prof: CONGRAD: st%2i ns%2i nm%2i bs%5i iter%5i sec%7.4f mflops = %g\n",
          best_st, best_ns, best_nm, best_bs,
          res_arg.final_iter, info.final_sec, mf);

  printf0("best: CONGRAD: st%2i ns%2i nm%2i bs%5i mflops = %g\n",
          best_st, best_ns, best_nm, best_bs, best_mf);

  if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); }
  //QOP_wilson_invert_unload_links();
  if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); }
  QOP_finalize();
}
예제 #19
0
void 
QOP_hisq_deriv_multi_fnmat2_qdp(QOP_info_t *info,  
				QOP_FermionLinksHisq *flh,
				QDP_ColorMatrix *deriv[],
				QOP_hisq_coeffs_t *hisq_coeff,
				REAL *residues,
				QDP_ColorVector *x[], 
				int *n_orders_naik)
{
#define NC QDP_get_nc(deriv[0])
  if(!QOP_asqtad.inited) QOP_asqtad_invert_init();

  double dtime = QDP_time();
  double totalflops = 0;
  int siteflops = 0;
  QOP_info_t tinfo;

  QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4];
  for(int i=0; i<4; i++) {
    Ugf[i] = flh->U_links[i];
    Vgf[i] = flh->V_links[i];
    Wgf[i] = flh->W_unitlinks[i];
  }

  QDP_ColorMatrix *force_accum_0[4];
  QDP_ColorMatrix *force_accum_0_naik[4];
  QDP_ColorMatrix *force_accum_1[4];
  QDP_ColorMatrix *force_accum_1u[4];
  QDP_ColorMatrix *force_accum_2[4];
  QDP_ColorMatrix *force_final[4];
  QDP_ColorMatrix *tmat = QDP_create_M();
  for(int i=0; i<4; i++) {
     force_accum_0[i] = QDP_create_M();
     force_accum_0_naik[i] = QDP_create_M();
     force_accum_1[i] = QDP_create_M();
     force_accum_1u[i] = QDP_create_M();
     force_accum_2[i] = QDP_create_M();
     force_final[i] = QDP_create_M();
     QDP_M_eq_zero(force_accum_2[i], QDP_all);
  }

  int n_naiks = hisq_coeff->n_naiks;
  int nterms = 0;
  for(int inaik = 0; inaik < n_naiks; inaik++)
    nterms += n_orders_naik[inaik];

  // loop on different naik masses
  int n_naik_shift = 0;
  for(int inaik=0; inaik<n_naiks; inaik++) {
    int n_orders_naik_current;
    if( inaik==0 ) {
      n_orders_naik_current = nterms;
    } else {
      n_orders_naik_current = n_orders_naik[inaik];
    }

    QOP_get_mid(&tinfo, force_accum_0, QDP_neighbor, 4, residues+n_naik_shift,
		1, x+n_naik_shift, n_orders_naik_current);
    totalflops += tinfo.final_flop;
    QOP_get_mid(&tinfo, force_accum_0_naik, QOP_common.neighbor3, 4,
		residues+n_naik_shift, 1, x+n_naik_shift,
		n_orders_naik_current);
    totalflops += tinfo.final_flop;
    // compensate for -1 on odd sites here instead of at end
    for(int dir=0; dir<4; dir++) {
      QDP_M_eqm_M(force_accum_0[dir], force_accum_0[dir], QDP_odd);
      QDP_M_eqm_M(force_accum_0_naik[dir], force_accum_0_naik[dir], QDP_odd);
    }

    // smearing level 0
    for(int i=0; i<4; i++) QDP_M_eq_zero(force_accum_1[i], QDP_all);
    if(inaik==0) {
      QOP_asqtad_coeffs_t acoef;
      acoef.one_link = hisq_coeff->asqtad_one_link;
      acoef.three_staple = hisq_coeff->asqtad_three_staple;
      acoef.five_staple = hisq_coeff->asqtad_five_staple;
      acoef.seven_staple = hisq_coeff->asqtad_seven_staple;
      acoef.lepage = hisq_coeff->asqtad_lepage;
      acoef.naik = hisq_coeff->asqtad_naik;
      QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef,
		       force_accum_0, force_accum_0_naik);
      //QOP_printf0("HISQ smear0 flops = %g\n", tinfo.final_flop);
      totalflops += tinfo.final_flop;
    } else {
      QOP_asqtad_coeffs_t acoef;
      acoef.one_link = hisq_coeff->difference_one_link;
      acoef.three_staple = 0;
      acoef.five_staple = 0;
      acoef.seven_staple = 0;
      acoef.lepage = 0;
      acoef.naik = hisq_coeff->difference_naik;
      QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef,
		       force_accum_0, force_accum_0_naik);
      totalflops += tinfo.final_flop;
    }

    QLA_Real coeff_mult;
    if( inaik==0 ) {
      coeff_mult = 1.0;
    } else {
      coeff_mult = hisq_coeff->eps_naik[inaik];
    }
    for(int dir=0; dir<4; dir++) {
      QDP_M_peq_r_times_M(force_accum_2[dir], &coeff_mult,
			  force_accum_1[dir], QDP_all);
    }
    siteflops += 4*36;

    n_naik_shift += n_orders_naik[inaik];
  }

  // smearing level 1
  QOP_asqtad_coeffs_t acoef;
  acoef.one_link = hisq_coeff->fat7_one_link;
  acoef.three_staple = hisq_coeff->fat7_three_staple;
  acoef.five_staple = hisq_coeff->fat7_five_staple;
  acoef.seven_staple = hisq_coeff->fat7_seven_staple;
  acoef.lepage = 0;
  acoef.naik = 0;
  if(QOP_hisq_links.use_fat7_lepage) {
    acoef.lepage = hisq_coeff->fat7_lepage;
  }

  QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod;
  if ( umethod==QOP_UNITARIZE_NONE ){

    for(int dir=0; dir<4; dir++)
      QDP_M_eq_zero(force_accum_1[dir], QDP_all);
    QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef,
		     force_accum_2, NULL);
    totalflops += tinfo.final_flop;

  } else if ( umethod==QOP_UNITARIZE_RATIONAL ) {

    for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all);
    // reunitarization
#if QOP_Colors == 3
    QOP_hisq_force_multi_reunit(&tinfo, Vgf, force_accum_2, force_accum_1u);
#else
    for(int mu=0; mu<4; mu++) {
      QOP_projectU_deriv_qdp(&tinfo, force_accum_2[mu], Wgf[mu], Vgf[mu], force_accum_1u[mu]);
    }
#endif
    //QOP_printf0("reunit flops = %g\n", tinfo.final_flop);
    for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all);
    totalflops += tinfo.final_flop;

    for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all);
    QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef,
		     force_accum_1u, NULL);
    //QOP_printf0("HISQ smear1 flops = %g\n", tinfo.final_flop);
    totalflops += tinfo.final_flop;

  } else {
    QOP_printf0("Unknown or unsupported unitarization method\n");
    exit(1);
  }

  // take into account even/odd parity (it is NOT done in "smearing" routine)
  // eps multiplication done outside QOP 
  // extra factor of 2
  for(int dir=0; dir<4; dir++) {
    QLA_Real treal = 2;
    //QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_even);
    //QDP_M_meq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_odd);
    QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_all);
  }
  siteflops += 4*36;

  for(int i=0; i<4; i++) {
     QDP_destroy_M( force_accum_0[i] );
     QDP_destroy_M( force_accum_0_naik[i] );
     QDP_destroy_M( force_accum_1[i] );
     QDP_destroy_M( force_accum_1u[i] );
     QDP_destroy_M( force_accum_2[i] );
     QDP_destroy_M( force_final[i] );
  }
  QDP_destroy_M( tmat );

  totalflops += ((double)siteflops)*QDP_sites_on_node;
  info->final_sec = QDP_time() - dtime;
  info->final_flop = totalflops;
  info->status = QOP_SUCCESS;
#undef NC
}
예제 #20
0
/* Smearing level i*/
static void 
QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, 
				    QDP_ColorMatrix * gf[4],
				    REAL *residues,
				    QDP_ColorVector *x[], 
				    int nterms, 
				    QDP_ColorMatrix *force_accum[4],
				    QDP_ColorMatrix *force_accum_old[4],
				    QDP_ColorMatrix *force_accum_naik_old[4],
				    int internal_num_q_paths,
				    Q_path *internal_q_paths_sorted,
				    int *internal_netbackdir_table)
{
  int i,j,k,lastdir=-99,ipath,ilink;
  int length,dir,odir;
  REAL coeff;

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];;
  QDP_ColorVector *vec_tmp[2];

  int netbackdir;
  size_t nflops = 0;

// table of net path displacements (backwards from usual convention)

  Q_path *this_path;	// pointer to current path

  /* Allocate fields */
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    oprod_along_path[i] = QDP_create_M();
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){ 
    // 0 element is never used (it's unit matrix)
    mats_along_path[i] = QDP_create_M();
  }

  mat_tmp0   = QDP_create_M();
  mat_tmp1   = QDP_create_M();
  for(i=0; i<8; i++) stmp[i] = QDP_create_M();
  tmat       = QDP_create_M();
  vec_tmp[0] = QDP_create_V();
  vec_tmp[1] = QDP_create_V();
 
  // clear force accumulators
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum[dir], QDP_all);

  // loop over paths, and loop over links in path 
  for( ipath=0; ipath<internal_num_q_paths; ipath++ ){
    this_path = &(internal_q_paths_sorted[ipath]); 
    if(this_path->forwback== -1)continue;	// skip backwards dslash 
    length = this_path->length;
    netbackdir = internal_netbackdir_table[ipath];

    // move f(i-1) force from current site in positive direction,
    //  this corresponds to outer product |X><Y| calculated at the endpoint of the path 
    if( netbackdir<8) { // Not a Naik path
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_old[OPP_DIR(netbackdir)],
				 tmat, netbackdir );
    }
    else { // Naik path
      if( NULL==force_accum_naik_old ) {
        QOP_printf0( "hisq_force_multi_smearing_fnmat:  mismatch:\n" );
        QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" );
        exit(0);
      }
      // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_naik_old[OPP_DIR(netbackdir-8)],
				 tmat, netbackdir );
    }

    // figure out how much of the outer products along the path must be
    // recomputed. j is last one needing recomputation. k is first one.
    j=length-1; // default is recompute all
    if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0;

    for(ilink=j;ilink>=k;ilink--){
      link_transport_connection_qdp( oprod_along_path[length-ilink], 
				     oprod_along_path[length-ilink-1], gf,
				     mat_tmp0, stmp, this_path->dir[ilink]  );
      nflops += 198;
    }

    // maintain an array of transports "to this point" along the path.
    //	Don't recompute beginning parts of path if same as last path 
    ilink=0; // first link where new transport is needed
    // Sometimes we don't need the matrix for the last link
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;

    for( ; ilink<k; ilink++ ){
      if( ilink==0 ){
        dir = this_path->dir[0];
	if( GOES_FORWARDS(dir) ){
	  QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir],
		      QDP_backward, QDP_all);
	  QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all);
	  QDP_discard_M(tmat);
	}
	else{
	  QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all);
	}
      }
      else { // ilink != 0
        dir = OPP_DIR(this_path->dir[ilink]);

	link_transport_connection_qdp( mats_along_path[ilink+1], 
				       mats_along_path[ilink], gf,
				       mat_tmp0, stmp, dir );
	nflops += 198;
      }
    } // end loop over links

    // A path has (length+1) points, counting the ends.  At first
    //	 point, no "down" direction links have their momenta "at this
    //	 point". At last, no "up" ... 
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;
    for( ilink=0; ilink<=k; ilink++ ){
      if(ilink<length)dir = this_path->dir[ilink];
      else dir=NODIR;
      coeff = this_path->coeff;
      if( (ilink%2)==1 )coeff = -coeff;
      // add in contribution to the force 
      if( ilink<length && GOES_FORWARDS(dir) ){
	link_gather_connection_qdp(mat_tmp1, 
		       oprod_along_path[length-ilink-1], tmat, dir );
        if(ilink==0) 
	  {
	    QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all);
	  }
        else
	  {
	    QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], 
				mat_tmp1, QDP_all);
	    nflops += 198;
	    QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	  }
	QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      if( ilink>0 && GOES_BACKWARDS(lastdir) ){
	odir = OPP_DIR(lastdir);
        if( ilink==1 ){
	  QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all);
	  QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	}
        else{
	  link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], 
				     tmat, odir );
	  QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], 
			      mat_tmp1, QDP_all);
	  nflops += 198;
	  QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all);
        }
	QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      lastdir = dir;
    } // end loop over links in path //
  } // end loop over paths //

  QDP_destroy_V( vec_tmp[0] );
  QDP_destroy_V( vec_tmp[1] );
  QDP_destroy_M( mat_tmp0 );
  QDP_destroy_M( mat_tmp1 );
  QDP_destroy_M( tmat );
  for(i=0; i<8; i++) QDP_destroy_M(stmp[i]);
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( oprod_along_path[i] );
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( mats_along_path[i] );
  }

  info->final_flop = ((double)nflops)*QDP_sites_on_node;

  return;
}//hisq_force_multi_smearing_fnmat
예제 #21
0
int
congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) 
{
  int i;
  int iteration;	/* counter for iterations */
  double source_norm;
  double rsqstop;
  QLA_Real a, b;
  double rsq,oldrsq,pkp;	/* Sugar's a,b,resid**2,previous resid*2 */
				/* pkp = cg_p.K.cg_p */
  QLA_Real mkappa;
  QLA_Real sum;
#ifdef CGTIME
  double dtime;
#endif
#ifdef LU
  mkappa = -kappa*kappa;
#else
  mkappa = -kappa;
#endif

  setup_cg();

  for(i=0; i<4; i++) {
    set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD);
  }
  set_D_from_site(psi, F_OFFSET(psi),EVENANDODD);
  set_D_from_site(chi, F_OFFSET(chi),EVENANDODD);

#ifdef PRESHIFT_LINKS
  {
    QDP_ColorMatrix *tcm;
    tcm = QDP_create_M();
    for(i=0; i<4; i++) {
      QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all);
      QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all);
    }
    QDP_destroy_M(tcm);
  }
#endif

#ifdef CGTIME
  dtime = -dclock();
#endif

  iteration=0;
 start:
  /* mp <-  M_adjoint*M*psi
     r,p <- chi - mp
     rsq = |r|^2
     source_norm = |chi|^2
  */
  rsq = source_norm = 0.0;

#ifdef LU

  QDP_D_eq_D(cgp, psi, QDP_even);
  dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1);
  dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2);
  QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even);

  dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3);
  dslash_special_qdp(mp, tt2, -1, QDP_even, temp4);
  QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even);
  QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even);
  QDP_D_eq_D(cgp, cgr, QDP_even);

  QDP_r_eq_norm2_D(&sum, chi, QDP_even);
  source_norm = sum;
  QDP_r_eq_norm2_D(&sum, cgr, QDP_even);
  rsq = sum;

#else

  QDP_D_eq_D(cgp, psi, QDP_even);
  dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1);
  QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all);

  dslash_special_qdp(mp, ttt, -1, QDP_all, temp1);
  QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all);

  QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all);
  QDP_D_eq_D(cgp, cgr, QDP_all);

  QDP_r_eq_norm2_D(&sum, chi, QDP_all);
  source_norm = sum;
  QDP_r_eq_norm2_D(&sum, cgr, QDP_all);
  rsq = sum;

#endif

  iteration++ ;	/* iteration counts number of multiplications
		   by M_adjoint*M */
  total_iters++;
  /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm);
     if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n",
     iteration,(double)rsq,(double)pkp,(double)a );**/
  rsqstop = rsqmin * source_norm;
  if( rsq <= rsqstop ){
    *final_rsq_ptr= (Real)rsq;
    return (iteration);
  }

  /* main loop - do until convergence or time to restart */
  /* 
     oldrsq <- rsq
     mp <- M_adjoint*M*p
     pkp <- p.M_adjoint*M.p
     a <- rsq/pkp
     psi <- psi + a*p
     r <- r - a*mp
     rsq <- |r|^2
     b <- rsq/oldrsq
     p <- r + b*p
  */
  do {
    oldrsq = rsq;
#ifdef LU
    dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1);
    dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2);
    QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even);

    dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3);
    dslash_special_qdp(mp, tt2, -1, QDP_even, temp4);
    QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even);

    QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even);
    pkp = sum;
#else
    dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1);
    QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all);

    dslash_special_qdp(mp, ttt, -1, QDP_all, temp1);
    QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all);

    QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all);
    pkp = sum;
#endif
    iteration++;
    total_iters++;

    a = rsq / pkp;
    QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET);
    QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET);
    QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET);
    rsq = sum;

    /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n",
       iteration,(double)rsq,(double)pkp,(double)a );**/
    if( rsq <= rsqstop ){
      *final_rsq_ptr= (Real)rsq;
#ifdef CGTIME
      dtime += dclock();
      if(this_node==0)
	printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n",
	       dtime,rsq,iteration,
	       (double)6480*iteration*even_sites_on_node/(dtime*1e6));
      //(double)5616*iteration*even_sites_on_node/(dtime*1e6));
#endif
      set_site_from_D(F_OFFSET(psi), psi,EVENANDODD);
      return (iteration);
    }

    b = rsq / oldrsq;
    QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET);

  } while( iteration%niter != 0);

  set_site_from_D(F_OFFSET(psi), psi,EVENANDODD);

  if( iteration < 3*niter ) goto start;
  *final_rsq_ptr= (Real)rsq;
  return(iteration);
}
예제 #22
0
// topdir = 1..nd
// sidedir = -nd..nd
// toplinknum,sidelinknum = 0..nin-1
void
QOP_staples_deriv(QOP_info_t *info, int nout, int nin,
		  QDP_ColorMatrix *deriv[], QDP_ColorMatrix *chain[],
		  QDP_ColorMatrix *in[],
		  int nstaples[], int *topdir[], int *sidedir[],
		  int *toplinknum[], int *sidelinknum[], QLA_Real *coef[])
{
#define NC QDP_get_nc(in[0])
  double dtime = QOP_time();
  double nflops = 0;
  int nd = QDP_ndim();
  QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *t3, *t4, *tc, *bt2[nd], *bt3[nd], *ctmps[nd];
  int ctn[nd];
  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      ftmps[i][j] = NULL;
  for(int i=0; i<nd; i++) bt2[i] = bt3[i] = ctmps[i] = NULL;
  t1 = QDP_create_M();
  t2 = QDP_create_M();
  t3 = QDP_create_M();
  t4 = QDP_create_M();
  tc = QDP_create_M();

  // process in reverse in case calculated staples used as input for others
  for(int io=nout-1; io>=0; io--) {
    for(int i=0; i<nd; i++) {
      if(ctmps[i]) QDP_discard_M(ctmps[i]);
      ctn[i] = 0;
    }
    QDP_M_eq_M(tc, chain[io], QDP_all);
    for(int s=0; s<nstaples[io]; s++) {
      QLA_Real c = coef[io][s];
      int tn = toplinknum[io][s];
      int sdir = sidedir[io][s];
      //QOP_printf0("io: %i  s: %i  sdir: %i  tn: %i  c: %g\n", io, s, sdir, tn, c);
      if(sdir==0) {
	if(c==1) {
	  QDP_M_peq_M(deriv[tn], tc, QDP_all);
	  nflops += PEQM;
	} else {
	  QDP_M_peq_r_times_M(deriv[tn], &c, tc, QDP_all);
	  nflops += 2*PEQM;
	}
      } else if(sdir>0) {
	int nu = sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	//QOP_printf0("  mu: %i  nu: %i  sn: %i\n", mu, nu, sn);
	QDP_ColorMatrix *Umunu = getU(tn, mu, nu);
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all);
	QDP_M_eq_Ma_times_M(t2, tc, t1, QDP_all);
	QDP_ColorMatrix *tb2 = shiftb(t2, mu);
	QDP_M_eq_M_times_M(t1, tc, Unumu, QDP_all);
	QDP_M_eq_Ma_times_M(t3, in[sn], t1, QDP_all);
	QDP_ColorMatrix *tb3 = shiftb(t3, nu);
	if(c==1) {
	  QDP_M_peq_M_times_Ma(deriv[sn], t1, Umunu, QDP_all);
	  QDP_M_peq_M(deriv[sn], tb2, QDP_all);
	  QDP_M_peq_M(deriv[tn], tb3, QDP_all);
	  nflops += 4*EQMTM+PEQMTM+2*PEQM;
	} else {
	  QDP_M_eq_M_times_Ma(t4, t1, Umunu, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all);
	  QDP_M_peq_r_times_M(deriv[tn], &c, tb3, QDP_all);
	  nflops += 5*EQMTM+6*PEQM;
	}
	QDP_discard_M(tb2);
	QDP_discard_M(tb3);
      } else {
	int nu = -sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	QDP_ColorMatrix *Cmunu = getC(nu);
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[sn], Cmunu, QDP_all);
	QDP_M_eq_Ma_times_M(t2, in[tn], t1, QDP_all);
	QDP_ColorMatrix *tb2 = shiftb(t2, mu);
	QDP_M_eq_M_times_M(t3, in[tn], Unumu, QDP_all);
	if(c==1) {
	  QDP_M_peq_M_times_Ma(deriv[tn], t1, Unumu, QDP_all);
	  QDP_M_peq_M_times_Ma(deriv[sn], t3, Cmunu, QDP_all);
	  QDP_M_peq_M(deriv[sn], tb2, QDP_all);
	  nflops += 3*EQMTM+2*PEQMTM+PEQM;
	} else {
	  QDP_M_eq_M_times_Ma(t4, t1, Unumu, QDP_all);
	  QDP_M_peq_r_times_M(deriv[tn], &c, t4, QDP_all);
	  QDP_M_eq_M_times_Ma(t4, t3, Cmunu, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all);
	  QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all);
	  nflops += 5*EQMTM+6*PEQM;
	}
	QDP_discard_M(tb2);
      }
    }
  }

  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]);
  for(int i=0; i<nd; i++) {
    if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]);
    if(bt3[i]!=NULL) QDP_destroy_M(bt3[i]);
    if(ctmps[i]!=NULL) QDP_destroy_M(ctmps[i]);
  }
  QDP_destroy_M(t1);
  QDP_destroy_M(t2);
  QDP_destroy_M(t3);
  QDP_destroy_M(t4);
  QDP_destroy_M(tc);
  info->final_sec = QOP_time() - dtime;
  info->final_flop = nflops*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
#undef NC
}
예제 #23
0
// topdir = 1..nd
// sidedir = -nd..nd
// toplinknum,sidelinknum = 0..nin-1
void
QOP_staples(QOP_info_t *info, int nout, int nin,
	    QDP_ColorMatrix *out[], QDP_ColorMatrix *in[],
	    int nstaples[], int *topdir[], int *sidedir[],
	    int *toplinknum[], int *sidelinknum[], QLA_Real *coef[])
{
#define NC QDP_get_nc(in[0])
  double dtime = QOP_time();
  double nflops = 0;
  int nd = QDP_ndim();
  QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *bt2[nd];
  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      ftmps[i][j] = NULL;
  for(int i=0; i<nd; i++) bt2[i] = NULL;
  t1 = QDP_create_M();
  t2 = QDP_create_M();

  for(int io=0; io<nout; io++) {
    //QOP_printf0("%i: ns: %i\n", io, nstaples[io]);
    for(int s=0; s<nstaples[io]; s++) {
      QLA_Real c = coef[io][s];
      int tn = toplinknum[io][s];
      int sdir = sidedir[io][s];
      //QOP_printf0(" %i:  sdir: %i  c: %g\n", s, sdir, c);
      if(sdir==0) {
	if(c==1) {
	  QDP_M_peq_M(out[io], in[tn], QDP_all);
	  nflops += PEQM;
	} else {
	  QDP_M_peq_r_times_M(out[io], &c, in[tn], QDP_all);
	  nflops += 2*PEQM;
	}
      } else if(sdir>0) {
	int nu = sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	QDP_ColorMatrix *Umunu = getU(tn, mu, nu);
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all);
	if(c==1) {
	  QDP_M_peq_M_times_Ma(out[io], t1, Unumu, QDP_all);
	  nflops += EQMTM+PEQMTM;
	} else {
	  QDP_M_eq_M_times_Ma(t2, t1, Unumu, QDP_all);
	  QDP_M_peq_r_times_M(out[io], &c, t2, QDP_all);
	  nflops += 2*EQMTM+2*PEQM;
	}
      } else {
	int nu = -sdir-1;
	int mu = topdir[io][s]-1;
	int sn = sidelinknum[io][s];
	QDP_ColorMatrix *Unumu = getU(sn, nu, mu);
	QDP_M_eq_M_times_M(t1, in[tn], Unumu, QDP_all);
	QDP_M_eq_Ma_times_M(t2, in[sn], t1, QDP_all);
	QDP_ColorMatrix *tb = shiftb(t2, nu);
	if(c==1) {
	  QDP_M_peq_M(out[io], tb, QDP_all);
	  nflops += 2*EQMTM+PEQM;
	} else {
	  QDP_M_peq_r_times_M(out[io], &c, tb, QDP_all);
	  nflops += 2*EQMTM+2*PEQM;
	}
	QDP_discard_M(tb);
      }
    }
  }

  for(int i=0; i<nin; i++)
    for(int j=0; j<nd; j++)
      if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]);
  for(int i=0; i<nd; i++) if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]);
  QDP_destroy_M(t1);
  QDP_destroy_M(t2);
  info->final_sec = QOP_time() - dtime;
  info->final_flop = nflops*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
#undef NC
}
예제 #24
0
static void 
create_fn_links_qdp(QDP_ColorMatrix *fl[], QDP_ColorMatrix *ll[],
		    QDP_ColorMatrix *gf[], asqtad_path_coeff *coeffs)
{
  
  int i, dir;
  QDP_ColorMatrix *staple, *tempmat1;
  int  nu,rho,sig ;
  QLA_Real one_link;
#ifdef LLTIME
  double nflopfl = 61632;
  double nflopll = 1804;
#endif
  double dtimefl,dtimell;

  for(i=0; i<4; i++) {
    fl[i] = QDP_create_M();
    ll[i] = QDP_create_M();
  }
  staple = QDP_create_M();
  tempmat1 = QDP_create_M();

  dtimefl = -dclock();

  /* to fix up the Lepage term, included by a trick below */
  one_link = coeffs->one_link - 6.0*coeffs->lepage;

  for(dir=0; dir<4; dir++) {
    QDP_M_eq_r_times_M(fl[dir], &one_link, gf[dir], QDP_all);
    for(nu=0; nu<4; nu++) if(nu!=dir) {
      compute_gen_staple(staple, dir, nu, gf[dir],
			 (double)coeffs->three_staple, gf, fl);
      compute_gen_staple(NULL, dir, nu, staple, coeffs->lepage, gf, fl);
      for(rho=0; rho<4; rho++) if((rho!=dir)&&(rho!=nu)) {
	compute_gen_staple(tempmat1, dir, rho, staple,
			   (double)coeffs->five_staple, gf, fl);
	for(sig=0; sig<4; sig++) {
	  if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) {
	    compute_gen_staple(NULL, dir, sig, tempmat1,
			       (double)coeffs->seven_staple, gf, fl);
	  }
	} /* sig */
      } /* rho */
    } /* nu */
  } /* dir */

  dtimell = -dclock();
  dtimefl -= dtimell;
#ifdef LLTIME
  node0_printf("LLTIME(Fat): time = %e (Asqtad opt) mflops = %e\n",dtimefl,
         (Real)nflopfl*volume/(1e6*dtimefl*numnodes()) );
#endif

  /* long links */
  for(dir=0; dir<4; dir++) {
    QLA_Real naik = coeffs->naik;
    QDP_M_eq_sM(staple, gf[dir], QDP_neighbor[dir], QDP_forward, QDP_all);
    QDP_M_eq_M_times_M(tempmat1, gf[dir], staple, QDP_all);
    QDP_discard_M(staple);
    QDP_M_eq_sM(staple, tempmat1, QDP_neighbor[dir], QDP_forward, QDP_all);
    QDP_M_eq_M_times_M(ll[dir], gf[dir], staple, QDP_all);
    QDP_M_eq_r_times_M(ll[dir], &naik, ll[dir], QDP_all);
  }
  
  dtimell += dclock();
#ifdef LLTIME
  node0_printf("LLTIME(long): time = %e (Asqtad opt) mflops = %e\n",dtimell,
         (Real)nflopll*volume/(1e6*dtimell*numnodes()) );
#endif

  QDP_destroy_M(staple);
  QDP_destroy_M(tempmat1);
}
예제 #25
0
void
QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[],
				 QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef,
				 REAL eps[], QDP_ColorVector *xin[], int nsrc)
{
#define NC QDP_get_nc(xin[0])
  REAL coeff[nsrc];
  REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc];
  REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc];

  QDP_ColorVector *P3[8][nsrc];

  QDP_ColorVector *P5[8][nsrc];
  QDP_ColorVector *P5tmp[8][8][nsrc];
  QDP_ColorVector *P5s[4][nsrc];
  QDP_ColorVector *P5tmps[4][8][nsrc];

  //QDP_ColorVector *xin[nsrc];
  QDP_ColorVector *xintmp[8][nsrc];
  QDP_ColorVector *Pmu[nsrc];
  QDP_ColorVector *Pmutmp[8][nsrc];
  QDP_ColorVector *Pnumu[nsrc];
  QDP_ColorVector *Pnumutmp[8][nsrc];
  QDP_ColorVector *Prhonumu[nsrc];
  QDP_ColorVector *Prhonumutmp[8][nsrc];
  QDP_ColorVector *P7[nsrc];
  QDP_ColorVector *P7tmp[8][nsrc];
  QDP_ColorVector *P7rho[nsrc];
  QDP_ColorVector *ttv[nsrc];

  int i, dir;
  int mu, nu, rho, sig;

  double nflop1 = 253935;
  double nflop2 = 433968;
  double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1);
  double dtime;
  dtime = -QOP_time();

  ASQTAD_FORCE_BEGIN;

  QOP_trace("test 1\n");
  /* setup parallel transport */
  QDP_ColorMatrix *tmpmat = QDP_create_M();
  for(i=0; i<QOP_common.ndim; i++) {
    fbshift[i] = QDP_neighbor[i];
    fbshiftdir[i] = QDP_forward;
    fblink[i] = links[i];
    fbshift[OPP_DIR(i)] = QDP_neighbor[i];
    fbshiftdir[OPP_DIR(i)] = QDP_backward;
    fblink[OPP_DIR(i)] = QDP_create_M();
    QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all);
    QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all);
  }

  tv = ttv;
  for(i=0; i<nsrc; i++) {
    tv[i] = QDP_create_V();
  }

  QOP_trace("test 2\n");
  /* Allocate temporary vectors */
  for(i=0; i<nsrc; i++) {
    Pmu[i] = QDP_create_V();
    Pnumu[i] = QDP_create_V();
    Prhonumu[i] = QDP_create_V();
    P7[i] = QDP_create_V();
    P7rho[i] = QDP_create_V();
    for(dir=0; dir<8; dir++) {
      xintmp[dir][i] = QDP_create_V();
      Pmutmp[dir][i] = QDP_create_V();
      Pnumutmp[dir][i] = QDP_create_V();
      Prhonumutmp[dir][i] = QDP_create_V();
      P7tmp[dir][i] = QDP_create_V();
    }
#if 1
    for(mu=0; mu<4; mu++) {
      P5s[mu][i] = QDP_create_V();
      for(dir=0; dir<8; dir++) {
	P5tmps[mu][dir][i] = QDP_create_V();
      }
    }
#else
    for(mu=0; mu<8; mu++) {
      P5[mu][i] = QDP_create_V();
      for(dir=0; dir<8; dir++) {
	P5tmp[mu][dir][i] = QDP_create_V();
	//printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout);
	if(P5tmp[mu][dir][i]==NULL) {
	  fprintf(stderr, "error: can't create V\n");
	  QDP_abort();
	}
      }
    }
#endif
  }
  //printf("%p\n", P5tmp[0][4][0]); fflush(stdout);

  for(mu=0; mu<8; mu++) {
    for(i=0; i<nsrc; i++) {
      P3[mu][i] = QDP_create_V();
      //P5[mu][i] = QDP_create_V();
    }
  }

  for(mu=0; mu<4; mu++) {
    tempmom_qdp[mu] = force[mu];
    QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd);
  }

  /* Path coefficients times fermion epsilon */
  /* Load path coefficients from table */
  for(i=0; i<nsrc; i++) {
    OneLink[i] = coef->one_link     * eps[i];
    Naik[i]    = coef->naik         * eps[i]; mNaik[i]    = -Naik[i];
    ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i];
    FiveSt[i]  = coef->five_staple  * eps[i]; mFiveSt[i]  = -FiveSt[i];
    SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i];
    Lepage[i]  = coef->lepage       * eps[i]; mLepage[i]  = -Lepage[i];
  }

#if 0
  printf("nsrc = %i\n", nsrc);
  printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0],
	 SevenSt[0], Lepage[0], Naik[0]);
#endif

  /* *************************************** */

  QOP_trace("start force loop\n");
  for(mu=0; mu<8; mu++) {
    //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]);
    u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]);

    for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) {
      //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]);
      u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]);

      if(GOES_FORWARDS(sig)) {
	/* Add the force F_sig[x+mu]:         x--+             *
	 *                                   |   |             *
	 *                                   o   o             *
	 * the 1 link in the path: - (numbering starts form 0) */
	add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc);
      }
    }

    for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) {
      int nP5 = 0;
      //Pnumu = hw_qdp[OPP_DIR(nu)];
      //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]);
      u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]);
      //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc);
      for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) &&
				   (sig!=nu)&&(sig!=OPP_DIR(nu)) ) {
#if 1
	for(i=0; i<nsrc; i++) {
	  P5[sig][i] = P5s[nP5][i];
	  for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i];
	}
#endif
	nP5++;
	//u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]);
	u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]);

	if(GOES_FORWARDS(sig)) {
	  /* Add the force F_sig[x+mu+nu]:      x--+             *
	   *                                   |   |             *
	   *                                   o   o             *
	   * the 2 link in the path: + (numbering starts form 0) */
	  add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc);
	}
      }
      QOP_trace("test 4\n");
      for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) &&
				   (rho!=nu)&&(rho!=OPP_DIR(nu)) ) {
	//Prhonumu = hw_qdp[OPP_DIR(rho)];
	//u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), 
	//		 temp_hw[OPP_DIR(rho)] );
	  u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc,
			     Pnumutmp[OPP_DIR(rho)]);
	  //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc);
	for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) &&
				     (sig!=nu )&&(sig!=OPP_DIR(nu )) &&
				     (sig!=rho)&&(sig!=OPP_DIR(rho)) ) {
	  /* Length 7 paths */
	  //P7 = hw_qdp[sig];
	  //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] );
  QOP_trace("test 43\n");
	  u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]);
  QOP_trace("test 44\n");
	  //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all);
	  //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all);
	  if(GOES_FORWARDS(sig)) {
	    /* Add the force F_sig[x+mu+nu+rho]:  x--+             *
	     *                                   |   |             *
	     *                                   o   o             *
	     * the 3 link in the path: - (numbering starts form 0) */
  QOP_trace("test 45\n");
	    add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc);
  QOP_trace("test 46\n");
	    //mom_meq_force(P7, Prhonumu, sig);
	  }
	  /* Add the force F_rho the 2(4) link in the path: +     */
	  //P7rho = hw_qdp[rho];
	  //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]);
  QOP_trace("test 47\n");
	  u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]);
  QOP_trace("test 48\n");
	  side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc);
  QOP_trace("test 49\n");
	  //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho);
	  /* Add the P7rho vector to P5 */
	  for(i=0; i<nsrc; i++) {
	    if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i];
	    else coeff[i] = 0;
  QOP_trace("test 410\n");
	    QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all);
  QOP_trace("test 411\n");
	  }
	} /* sig */
      } /* rho */
  QOP_trace("test 5\n");
#define P5nu P7
      for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) &&
				   (sig!=nu)&&(sig!=OPP_DIR(nu)) ) {
	/* Length 5 paths */
	/* Add the force F_nu the 1(3) link in the path: -     */
	//P5nu = hw_qdp[nu];
	//u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]);
	u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]);
	side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc);
	/* Add the P5nu vector to P3 */
	for(i=0; i<nsrc; i++) {
	  if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; 
	  else coeff[i] = 0;
	  QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all);
	}
      } /* sig */
    } /* nu */

#define Pmumu Pnumu
#define Pmumutmp Pnumutmp
#define P5sig Prhonumu
#define P5sigtmp Prhonumutmp
#define P3mu P7
#define Popmu P7
#define Pmumumu P7
    /* Now the Lepage term... It is the same as 5-link paths with
       nu=mu and FiveSt=Lepage. */
    //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] );
    u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]);

    for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) {
      //P5sig = hw_qdp[sig];
      //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]);
      u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]);
      if(GOES_FORWARDS(sig)) {
	/* Add the force F_sig[x+mu+nu]:      x--+             *
	 *                                   |   |             *
	 *                                   o   o             *
	 * the 2 link in the path: + (numbering starts form 0) */
	add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc);
      }
      /* Add the force F_nu the 1(3) link in the path: -     */
      //P5nu = hw_qdp[mu];
      //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]);
      u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]);
      side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc);
      /* Add the P5nu vector to P3 */
      for(i=0; i<nsrc; i++) {
	if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i];
	else coeff[i] = 0;
	QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all);
      }

      /* Length 3 paths (Not the Naik term) */
      /* Add the force F_mu the 0(2) link in the path: +     */
      if(GOES_FORWARDS(mu)) {
	//P3mu = hw_qdp[mu];  /* OK to clobber P5nu */
	//u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]);
	//u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]);
	for(i=0; i<nsrc; i++) {
	  QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all);
	}
	u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]);
      }
      /* The above shift is not needed if mu is backwards */
      side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc);
    }

    /* Finally the OneLink and the Naik term */
    if(GOES_BACKWARDS(mu)) {
      /* Do only the forward terms in the Dslash */
      /* Because I have shifted with OPP_DIR(mu) Pmu is a forward *
       * shift.                                                   */
      /* The one link */
      add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc);
      /* For the same reason Pmumu is the forward double link */

      /* Popmu is a backward shift */
      //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */
      //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]);
      u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]);
      /* The Naik */
      /* link no 1: - */
      add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc);
      /* Pmumumu can overwrite Popmu which is no longer needed */
      //Pmumumu = hw_qdp[OPP_DIR(mu)];
      //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]);
      u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]);
      /* link no 0: + */
      add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc);
    } else {
      /* The rest of the Naik terms */
      //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */
      //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]);
      u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]);
      /* link no 2: + */
      /* Pmumu is double backward shift */
      add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc);
    }
    /* Here we have to do together the Naik term and the one link term */

  }/* mu */
  QOP_trace("test 6\n");
  QOP_trace("test 7\n");

  for(mu=0; mu<4; mu++) {
    QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even);
    QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd);
    QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all);
  }
  QDP_destroy_M(tmpmat);

  //printf("%p\n", P5tmp[0][4][0]); fflush(stdout);
  //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
  /* Free temporary vectors */
  for(i=0; i<nsrc; i++) {
    QDP_destroy_V(Pmu[i]);
    QDP_destroy_V(Pnumu[i]);
    QDP_destroy_V(Prhonumu[i]);
    QDP_destroy_V(P7[i]);
    QDP_destroy_V(P7rho[i]);
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    for(dir=0; dir<8; dir++) {
      QDP_destroy_V(xintmp[dir][i]);
      QDP_destroy_V(Pmutmp[dir][i]);
      QDP_destroy_V(Pnumutmp[dir][i]);
      QDP_destroy_V(Prhonumutmp[dir][i]);
      QDP_destroy_V(P7tmp[dir][i]);
    }
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    for(mu=0; mu<4; mu++) {
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      QDP_destroy_V(P5s[mu][i]);
      //QDP_destroy_V(P5[mu][i]);
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      for(dir=0; dir<8; dir++) {
	//if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
	QDP_destroy_V(P5tmps[mu][dir][i]);
	//printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout);
	//QDP_destroy_V(P5tmp[mu][dir][i]);
	//if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      }
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    }
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
  }

  //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); }
  for(mu=0; mu<8; mu++) {
    for(i=0; i<nsrc; i++) {
      QDP_destroy_V(P3[mu][i]);
    }
    //QDP_destroy_V(P5[mu][0]);
    //QDP_destroy_V(P5[mu][1]);
  }

  for(i=0; i<nsrc; i++) {
    QDP_destroy_V(tv[i]);
  }

  //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); }
  for(i=4; i<8; i++) {
    QDP_destroy_M(fblink[i]);
  }

  dtime += QOP_time();
  info->final_sec = dtime;
  info->final_flop = nflop*QDP_sites_on_node;
  info->status = QOP_SUCCESS;

  ASQTAD_FORCE_END;
#undef NC
}
예제 #26
0
void 
QOP_hisq_force_multi_wrapper_fnmat(QOP_info_t *info,  
				   QOP_FermionLinksHisq *flh,
				   QOP_Force *Force, 
				   QOP_hisq_coeffs_t *hisq_coeff,
				   REAL *residues,
				   QDP_ColorVector *x[], 
				   int *n_orders_naik)
  
{
  double dtime = QDP_time();
  int i, ipath, dir;
  REAL coeff_mult;

  double *eps_naik = hisq_coeff->eps_naik;
  int n_naiks = hisq_coeff->n_naiks;
  QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod;

  // Quark paths sorted by net displacement and last directions
  static Q_path *q_paths_sorted_1 = NULL;
  static Q_path *q_paths_sorted_2 = NULL;
  static Q_path *q_paths_sorted_3 = NULL;

  static int *netbackdir_table_1 = NULL;
  static int *netbackdir_table_2 = NULL;
  static int *netbackdir_table_3 = NULL;

  static int first_force = 1;

  if(first_force == 1) 
    QOP_make_paths_and_dirs_hisq(hisq_coeff, umethod);

  int num_q_paths_1 = qop_get_num_q_paths_1();
  int num_q_paths_2 = qop_get_num_q_paths_2();
  int num_q_paths_3 = qop_get_num_q_paths_3();

  Q_path *q_paths_1 = qop_get_q_paths_1();
  Q_path *q_paths_2 = qop_get_q_paths_2();
  Q_path *q_paths_3 = qop_get_q_paths_3();

  Q_path *q_paths_sorted_current = NULL;
  int *netbackdir_table_current = NULL;

  int inaik;
  int n_naik_shift;
  double final_flop = 0.;
  size_t nflops = 0;

  QDP_ColorMatrix * force[4] =  {Force->force[0], Force->force[1], 
				 Force->force[2], Force->force[3]};

  int num_q_paths_current,n_orders_naik_current;//==nterms


  QDP_ColorMatrix *force_accum_0[4];
  QDP_ColorMatrix *force_accum_0_naik[4];
  QDP_ColorMatrix *force_accum_1[4];
  QDP_ColorMatrix *force_accum_1u[4];
  QDP_ColorMatrix *force_accum_2[4];
  QDP_ColorMatrix *force_final[4];


  QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4];

  int nterms = 0, n_order_naik_total;

  for(inaik = 0; inaik < n_naiks; inaik++)
    nterms += n_orders_naik[inaik];
  n_order_naik_total = nterms;

  for(i=0;i<4;i++) {
    Ugf[i] = flh->U_links[i];
    Vgf[i] = flh->V_links[i];
    Wgf[i] = flh->W_unitlinks[i];
  }

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *mat_tmp0;

  REAL treal;

  if( first_force==1 ){
    if( q_paths_sorted_1==NULL ) 
      q_paths_sorted_1 = (Q_path *)malloc( num_q_paths_1*sizeof(Q_path) );
    if(netbackdir_table_1==NULL ) 
      netbackdir_table_1 = (int *)malloc( num_q_paths_1*sizeof(int) );
    if( q_paths_sorted_2==NULL ) 
      q_paths_sorted_2 = (Q_path *)malloc( num_q_paths_2*sizeof(Q_path) );
    if(netbackdir_table_2==NULL ) 
      netbackdir_table_2 = (int *)malloc( num_q_paths_2*sizeof(int) );
    if( q_paths_sorted_3==NULL ) 
      q_paths_sorted_3 = (Q_path *)malloc( num_q_paths_3*sizeof(Q_path) );
    if(netbackdir_table_3==NULL ) 
      netbackdir_table_3 = (int *)malloc( num_q_paths_3*sizeof(int) );
    else{QOP_printf0("WARNING: remaking sorted path tables\n"); exit(0); }
    // make sorted tables
    sort_quark_paths_hisq( q_paths_1, q_paths_sorted_1, num_q_paths_1, 8 );

    for( ipath=0; ipath<num_q_paths_1; ipath++ )
      netbackdir_table_1[ipath] = 
	find_backwards_gather( &(q_paths_sorted_1[ipath]) );

    sort_quark_paths_hisq( q_paths_2, q_paths_sorted_2, num_q_paths_2, 16 );

    for( ipath=0; ipath<num_q_paths_2; ipath++ )
      netbackdir_table_2[ipath] = 
	find_backwards_gather( &(q_paths_sorted_2[ipath]) );

    sort_quark_paths_hisq( q_paths_3, q_paths_sorted_3, num_q_paths_3, 16 );

    for( ipath=0; ipath<num_q_paths_3; ipath++ )
      netbackdir_table_3[ipath] = 
	find_backwards_gather( &(q_paths_sorted_3[ipath]) );

    first_force=0;
  }

  tmat = QDP_create_M();
  mat_tmp0 = QDP_create_M();

  for(i=XUP;i<=TUP;i++){
     force_accum_0[i] = QDP_create_M();
     force_accum_0_naik[i] = QDP_create_M();
     force_accum_1[i] = QDP_create_M();
     force_accum_1u[i] = QDP_create_M();
     force_accum_2[i] = QDP_create_M();
     force_final[i] = QDP_create_M();
  }


  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum_2[dir], QDP_all);


  // loop on different naik masses
  n_naik_shift = 0;


  for( inaik=0; inaik<n_naiks; inaik++ ) {

    // smearing level 0
    if( 0==inaik ) {
      n_orders_naik_current = n_order_naik_total;
    }
    else {
      n_orders_naik_current = n_orders_naik[inaik];
    }
    

    QOP_hisq_force_multi_smearing0_fnmat(info,residues+n_naik_shift, 
					 x+n_naik_shift, n_orders_naik_current,
					 force_accum_0, force_accum_0_naik);
    final_flop += info->final_flop;
 
    
    // smearing level 2
    if( 0==inaik ) {
      q_paths_sorted_current = q_paths_sorted_2;
      num_q_paths_current = num_q_paths_2;
      netbackdir_table_current = netbackdir_table_2;
    }
    else {
      q_paths_sorted_current = q_paths_sorted_3;
      num_q_paths_current = num_q_paths_3;
      netbackdir_table_current = netbackdir_table_3;
    }
    
    QOP_hisq_force_multi_smearing_fnmat( info,Wgf,residues+n_naik_shift, 
					 x+n_naik_shift, 
					 n_orders_naik_current, 
					 force_accum_1, 
					 force_accum_0, force_accum_0_naik, 
					 num_q_paths_current, 
					 q_paths_sorted_current, 
					 netbackdir_table_current );
    //QOP_printf0("HISQ smear0 flops = %g\n", info->final_flop);
    final_flop += info->final_flop;

    if( 0==inaik ) {
      coeff_mult = 1.0;
    }
    else {
      coeff_mult = eps_naik[inaik];
    }
    
    
    for(dir=XUP;dir<=TUP;dir++) {
      QDP_M_peq_r_times_M(force_accum_2[dir],&coeff_mult,
			  force_accum_1[dir],QDP_all);
      nflops += 36;
    }
    n_naik_shift += n_orders_naik[inaik];


  }

 

  if ( umethod==QOP_UNITARIZE_NONE ){

    // smearing level 1
    QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, 
					 x, 
					 nterms, force_accum_1, 
					 force_accum_2, NULL, 
					 num_q_paths_1, 
					 q_paths_sorted_1, 
					 netbackdir_table_1 );
    final_flop += info->final_flop;
    
  }
  else if ( umethod==QOP_UNITARIZE_RATIONAL ){

    
    // reunitarization
    QOP_hisq_force_multi_reunit(info,Vgf,force_accum_1u,
				force_accum_2);
    //QOP_printf0("reunit flops = %g\n", info->final_flop);
    final_flop += info->final_flop;
    
    // smearing level 1
    QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, 
					 x, 
					 nterms, force_accum_1, 
					 force_accum_1u, NULL, 
					 num_q_paths_1, 
					 q_paths_sorted_1, 
					 netbackdir_table_1 );
    //QOP_printf0("HISQ smear1 flops = %g\n", info->final_flop);
    final_flop += info->final_flop;
  }
  else
    {
      QOP_printf0("Unknown or unsupported unitarization method\n");
      exit(1);
      
    }


  // contraction with the link in question should be done here,
  // after contributions from all levels of smearing are taken into account

  for(dir=XUP;dir<=TUP;dir++){

    QDP_M_eq_M_times_M(force_final[dir],Ugf[dir],force_accum_1[dir],QDP_all);
    nflops += 198;

  }



  // take into account even/odd parity (it is NOT done in "smearing" routine)
  //eps multiplication done outside QOP 

  for(dir=XUP;dir<=TUP;dir++){
    QDP_M_eq_M(tmat,force_final[dir],QDP_all);

    treal = 2.0;
    QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_even);

    treal = -2.0;
    QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_odd);
    nflops += 18;

  }


  // Put antihermitian traceless part into momentum 
  // add force to momentum

  for(dir=XUP; dir<=TUP; dir++){

    QDP_M_eq_antiherm_M(mat_tmp0, force_final[dir], QDP_all);
    QDP_M_peq_M(force[dir], mat_tmp0, QDP_all);
    nflops += 24+18;
    //QDP_M_peq_M(force_final[dir], force[dir], QDP_all);
    //QDP_M_eq_antiherm_M(force[dir], force_final[dir], QDP_all);

  }



  for(i=XUP;i<=TUP;i++){
     QDP_destroy_M( force_accum_0[i] );
     QDP_destroy_M( force_accum_0_naik[i] );
     QDP_destroy_M( force_accum_1[i] );
     QDP_destroy_M( force_accum_1u[i] );
     QDP_destroy_M( force_accum_2[i] );
     QDP_destroy_M( force_final[i] );
  }

  QDP_destroy_M( tmat );
  QDP_destroy_M( mat_tmp0 );

  final_flop += ((double)nflops)*QDP_sites_on_node;

  info->final_sec = QDP_time() - dtime;
  info->final_flop = final_flop;
  info->status = QOP_SUCCESS;
  //QOP_printf0("HISQ force flops = %g\n", info->final_flop);
} //hisq_force_multi_wrapper_fnmat