コード例 #1
0
ファイル: gauge.c プロジェクト: erinaldi/qhmc
static int
qopqdp_gauge_zero(lua_State *L)
{
  qassert(lua_gettop(L)==1);
  gauge_t *g = qopqdp_gauge_check(L, -1);
  for(int i=0; i<g->nd; i++) {
    QDP_M_eq_zero(g->links[i], QDP_all_L(g->qlat));
  }
  return 0;
}
コード例 #2
0
void 
QOP_hisq_force_multi_fnmat2_qdp(QOP_info_t *info,  
				QOP_FermionLinksHisq *flh,
				QDP_ColorMatrix *force[], 
				QOP_hisq_coeffs_t *hisq_coeff,
				REAL *residues,
				QDP_ColorVector *x[], 
				int *n_orders_naik)
{
#define NC QDP_get_nc(force[0])
  double dtime = QOP_time();

  QDP_ColorMatrix *deriv[4];
  for(int mu=0; mu<4; mu++) {
    deriv[mu] = QDP_create_M();
    QDP_M_eq_zero(deriv[mu], QDP_all);
  }
  QOP_hisq_deriv_multi_fnmat2_qdp(info, flh, deriv, hisq_coeff, residues, x, n_orders_naik);

  // contraction with the link in question should be done here,
  // after contributions from all levels of smearing are taken into account
  // Put antihermitian traceless part into momentum 
  // add force to momentum
  QDP_ColorMatrix *mtmp = QDP_create_M();
  for(int dir=0; dir<4; dir++) {
    QDP_M_eq_M_times_Ma(mtmp, flh->U_links[dir], deriv[dir], QDP_all);
    QDP_M_eq_antiherm_M(deriv[dir], mtmp, QDP_all);
    QDP_M_peq_M(force[dir], deriv[dir], QDP_all);
  }
  info->final_flop += (4.*(198+24+18))*QDP_sites_on_node; 

  QDP_destroy_M(mtmp);
  for(int mu=0; mu<4; mu++) {
    QDP_destroy_M(deriv[mu]);
  }

  info->final_sec = QOP_time() - dtime;
  //QOP_printf0("HISQ force flops = %g\n", info->final_flop);
#undef NC
}
コード例 #3
0
void 
QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, 
		   QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps)
{
  REAL Plaq, Rect, Pgm ;
  QDP_ColorMatrix *tempmom_qdp[4];
  QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu)
  QDP_ColorMatrix *tmpmat;
  QDP_ColorMatrix *tmpmat1;
  QDP_ColorMatrix *tmpmat2;
  QDP_ColorMatrix *staples;
  QDP_ColorMatrix *tmpmat3;
  QDP_ColorMatrix *tmpmat4;

  int i, k;
  int mu, nu, sig;
  double dtime;
  //REAL eb3 = -eps*beta/3.0;
  REAL eb3 = -eps/3.0;
  int j[3][2] = {{1,2},
                 {0,2},
                 {0,1}};
  
  //  QOP_printf0("beta: %e, eb3: %e\n", beta, eb3);
  dtime = -QOP_time();

  for(mu=0; mu<4; mu++) {
    tempmom_qdp[mu] = QDP_create_M();
    QDP_M_eq_zero(tempmom_qdp[mu], QDP_all);
  }

  tmpmat = QDP_create_M();
  for(i=0; i<QOP_common.ndim; i++) {
    fblink[i] = gauge->links[i];
    fblink[OPP_DIR(i)] = QDP_create_M();
    QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all);
    QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all);
  }
  

  for(i=0; i<6; i++) {
    Amu[i] = QDP_create_M();
  }

  staples = QDP_create_M();
  tmpmat1 = QDP_create_M();
  tmpmat2 = QDP_create_M();
  tmpmat3 = QDP_create_M();
  tmpmat4 = QDP_create_M();

  Plaq = coeffs->plaquette;
  Rect = coeffs->rectangle;
  Pgm  = coeffs->parallelogram;

  //Construct 3-staples and rectangles
  for(mu=0; mu<4; mu++) {
    i=0;
    for(nu=0; nu<4; nu++) {
      if(nu!=mu){
	// tmpmat1 = Umu(x+nu)
	QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); 
        QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all);

        //tmpmat2 = Umu(x-nu)
	QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all);
        QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all);
       

 
	//tmpmat = U_{nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all);        
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all);
 
        //tmpmat = U_{-nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all);
        QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all);

        //tmpmat = U_{-nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all);        
        QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all);




        //tmpmat = U_{-nu}(x+mu) 
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all);        
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all);
        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all);
        QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all);

        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all);
        i++;
      }
      
    }

    // Construct the  pgm staples and add them to force
    QDP_M_eq_zero(staples, QDP_all);
    i=0;
    for(nu=0; nu<4; nu++){
      if(nu!=mu){
        k=0;
	for(sig=0; sig<4;sig ++){
	  if(sig!=mu && nu!=sig){
	    
	    // the nu_sig_mu ... staple and 3 reflections
            //tmpmat = Amu["sig"](x+nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all);   
            //tmpmat3 = Unu(x+mu+sig)
            QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE?
            //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = Usig(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);


            //tmpmat = Amu["sig"](x-nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all);   
            //tmpmat3 = U_{-nu}(x+mu+sig)
            QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE?
            //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = Usig(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);


            //tmpmat = Amu["-sig"](x-nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all);   
            //tmpmat = U_{-nu}(x+mu-sig)
            QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE?
            //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = U_{-sig}(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);

            


            //tmpmat = Amu["-sig"](x+nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["-sig"](x+nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all);   
            //tmpmat3 = Unu(x+mu-sig)
            QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE?
            //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = U_{-sig}(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);

	    k++;
	  }//close if sig!=nu ...
	}//close sig loop
	i++;
      }// close if nu!=mu
    }//close the pgm nu loop

    QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all);
   

    
  }// closes the mu loop

#ifdef CHKSUM
  QLA_ColorMatrix qcm;
  QLA_Complex det, chk;
  QLA_c_eq_r(chk, 0);
#endif
  for(mu=0; mu<4; mu++){
    QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE?
    QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE?
    QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE
#ifdef CHKSUM
    QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all);
    QLA_C_eq_det_M(&det, &qcm);
    QLA_c_peq_c(chk, det);
#endif
  }
#ifdef CHKSUM
  QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk));
#endif

  //DESTROY various fields

  QDP_destroy_M(tmpmat);
  QDP_destroy_M(tmpmat1);
  QDP_destroy_M(tmpmat2);
  QDP_destroy_M(tmpmat3);
  QDP_destroy_M(staples);
  QDP_destroy_M(tmpmat4);

  for(mu=0; mu<4; mu++){
    QDP_destroy_M(tempmom_qdp[mu]);
  }
  for(i=0; i<6; i++) {
    QDP_destroy_M(Amu[i]);
  }

  for(i=4; i<8; i++) {
    QDP_destroy_M(fblink[i]);
  }

  dtime += QOP_time();

  double nflop = 96720;
  info->final_sec = dtime;
  info->final_flop = nflop*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
  //QOP_printf0("Time in slow g_force: %e\n", info->final_sec);
} 
コード例 #4
0
/* Smearing level 0 */
static void 
QOP_hisq_force_multi_smearing0_fnmat(QOP_info_t *info,  
				     REAL *residues,
				     QDP_ColorVector *x[], 
				     int nterms, 
				     QDP_ColorMatrix *force_accum[4],
				     QDP_ColorMatrix *force_accum_naik[4])
{
  int term;
  int i,k;
  int dir;
  REAL coeff;

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mat_tmp0;
  QDP_ColorVector *tsrc[2], *vec_tmp[2];
  size_t nflops = 0;

  if( nterms==0 )return;

  mat_tmp0   = QDP_create_M();
  tmat       = QDP_create_M();
  tsrc[0] = QDP_create_V();
  tsrc[1] = QDP_create_V();
  vec_tmp[0] = QDP_create_V();
  vec_tmp[1] = QDP_create_V();

  for(i=0;i<=MAX_PATH_LENGTH;i++){
    oprod_along_path[i] = QDP_create_M();
  }

  // clear force accumulators
  
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum[dir], QDP_all);

  for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed
    k=0; // which vec_tmp we are using (0 or 1)
    QDP_V_eq_V(tsrc[k], x[0], QDP_all);
    QDP_V_eq_sV(vec_tmp[k], tsrc[k], 
		fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all);
    QDP_M_eq_zero(oprod_along_path[0], QDP_all);

    for(term=0;term<nterms;term++){
      if(term<nterms-1) {
	QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all);
	QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], 
		    fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all);
      }
      //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all);
      QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all);
      nflops += 54;
      QDP_discard_V(vec_tmp[k]);
      QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, 
			  QDP_all);
      nflops += 36;
      
      k=1-k; // swap 0 and 1
    } // end loop over terms in rational function expansion 

    link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat,
			       dir );
    coeff = 1.;
    QDP_M_peq_r_times_M(force_accum[dir],&coeff,oprod_along_path[1],QDP_all);
    nflops += 36;

  } // end of loop on directions //


  // *** Naik part *** /
  
  // clear force accumulators
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum_naik[dir], QDP_all);


  for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed
    k=0; // which vec_tmp we are using (0 or 1)
    QDP_V_eq_V(tsrc[k], x[0], QDP_all);
    QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_3_DIR( DIR3(dir) )), 
		fndir(OPP_3_DIR( DIR3(dir) )), QDP_all);

    QDP_M_eq_zero(oprod_along_path[0], QDP_all);

    for(term=0;term<nterms;term++){
      if(term<nterms-1) {
	QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all);
	QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_3_DIR( DIR3(dir) )), 
		    fndir(OPP_3_DIR( DIR3(dir) )), QDP_all);
      }
      //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all);
      QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all);
      nflops += 54;
      QDP_discard_V(vec_tmp[k]);
      QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all);
      nflops += 36;

      k=1-k; // swap 0 and 1
    } // end loop over terms in rational function expansion 

    link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, 
			       DIR3(dir) );
    coeff = 1; // fermion_eps is outside this routine in "wrapper" routine
    QDP_M_peq_r_times_M(force_accum_naik[dir],&coeff,
			oprod_along_path[1],QDP_all);
    nflops += 36;
  } // end of loop on directions 

  QDP_destroy_V( tsrc[0] );
  QDP_destroy_V( tsrc[1] );
  QDP_destroy_V( vec_tmp[0] );
  QDP_destroy_V( vec_tmp[1] );
  QDP_destroy_M( mat_tmp0 );
  QDP_destroy_M( tmat );
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( oprod_along_path[i] );
  }

  info->final_flop = ((double)nflops)*QDP_sites_on_node;
  return;
} //hisq_force_multi_smearing0_fnmat
コード例 #5
0
void 
QOP_hisq_force_multi_wrapper_fnmat(QOP_info_t *info,  
				   QOP_FermionLinksHisq *flh,
				   QOP_Force *Force, 
				   QOP_hisq_coeffs_t *hisq_coeff,
				   REAL *residues,
				   QDP_ColorVector *x[], 
				   int *n_orders_naik)
  
{
  double dtime = QDP_time();
  int i, ipath, dir;
  REAL coeff_mult;

  double *eps_naik = hisq_coeff->eps_naik;
  int n_naiks = hisq_coeff->n_naiks;
  QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod;

  // Quark paths sorted by net displacement and last directions
  static Q_path *q_paths_sorted_1 = NULL;
  static Q_path *q_paths_sorted_2 = NULL;
  static Q_path *q_paths_sorted_3 = NULL;

  static int *netbackdir_table_1 = NULL;
  static int *netbackdir_table_2 = NULL;
  static int *netbackdir_table_3 = NULL;

  static int first_force = 1;

  if(first_force == 1) 
    QOP_make_paths_and_dirs_hisq(hisq_coeff, umethod);

  int num_q_paths_1 = qop_get_num_q_paths_1();
  int num_q_paths_2 = qop_get_num_q_paths_2();
  int num_q_paths_3 = qop_get_num_q_paths_3();

  Q_path *q_paths_1 = qop_get_q_paths_1();
  Q_path *q_paths_2 = qop_get_q_paths_2();
  Q_path *q_paths_3 = qop_get_q_paths_3();

  Q_path *q_paths_sorted_current = NULL;
  int *netbackdir_table_current = NULL;

  int inaik;
  int n_naik_shift;
  double final_flop = 0.;
  size_t nflops = 0;

  QDP_ColorMatrix * force[4] =  {Force->force[0], Force->force[1], 
				 Force->force[2], Force->force[3]};

  int num_q_paths_current,n_orders_naik_current;//==nterms


  QDP_ColorMatrix *force_accum_0[4];
  QDP_ColorMatrix *force_accum_0_naik[4];
  QDP_ColorMatrix *force_accum_1[4];
  QDP_ColorMatrix *force_accum_1u[4];
  QDP_ColorMatrix *force_accum_2[4];
  QDP_ColorMatrix *force_final[4];


  QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4];

  int nterms = 0, n_order_naik_total;

  for(inaik = 0; inaik < n_naiks; inaik++)
    nterms += n_orders_naik[inaik];
  n_order_naik_total = nterms;

  for(i=0;i<4;i++) {
    Ugf[i] = flh->U_links[i];
    Vgf[i] = flh->V_links[i];
    Wgf[i] = flh->W_unitlinks[i];
  }

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *mat_tmp0;

  REAL treal;

  if( first_force==1 ){
    if( q_paths_sorted_1==NULL ) 
      q_paths_sorted_1 = (Q_path *)malloc( num_q_paths_1*sizeof(Q_path) );
    if(netbackdir_table_1==NULL ) 
      netbackdir_table_1 = (int *)malloc( num_q_paths_1*sizeof(int) );
    if( q_paths_sorted_2==NULL ) 
      q_paths_sorted_2 = (Q_path *)malloc( num_q_paths_2*sizeof(Q_path) );
    if(netbackdir_table_2==NULL ) 
      netbackdir_table_2 = (int *)malloc( num_q_paths_2*sizeof(int) );
    if( q_paths_sorted_3==NULL ) 
      q_paths_sorted_3 = (Q_path *)malloc( num_q_paths_3*sizeof(Q_path) );
    if(netbackdir_table_3==NULL ) 
      netbackdir_table_3 = (int *)malloc( num_q_paths_3*sizeof(int) );
    else{QOP_printf0("WARNING: remaking sorted path tables\n"); exit(0); }
    // make sorted tables
    sort_quark_paths_hisq( q_paths_1, q_paths_sorted_1, num_q_paths_1, 8 );

    for( ipath=0; ipath<num_q_paths_1; ipath++ )
      netbackdir_table_1[ipath] = 
	find_backwards_gather( &(q_paths_sorted_1[ipath]) );

    sort_quark_paths_hisq( q_paths_2, q_paths_sorted_2, num_q_paths_2, 16 );

    for( ipath=0; ipath<num_q_paths_2; ipath++ )
      netbackdir_table_2[ipath] = 
	find_backwards_gather( &(q_paths_sorted_2[ipath]) );

    sort_quark_paths_hisq( q_paths_3, q_paths_sorted_3, num_q_paths_3, 16 );

    for( ipath=0; ipath<num_q_paths_3; ipath++ )
      netbackdir_table_3[ipath] = 
	find_backwards_gather( &(q_paths_sorted_3[ipath]) );

    first_force=0;
  }

  tmat = QDP_create_M();
  mat_tmp0 = QDP_create_M();

  for(i=XUP;i<=TUP;i++){
     force_accum_0[i] = QDP_create_M();
     force_accum_0_naik[i] = QDP_create_M();
     force_accum_1[i] = QDP_create_M();
     force_accum_1u[i] = QDP_create_M();
     force_accum_2[i] = QDP_create_M();
     force_final[i] = QDP_create_M();
  }


  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum_2[dir], QDP_all);


  // loop on different naik masses
  n_naik_shift = 0;


  for( inaik=0; inaik<n_naiks; inaik++ ) {

    // smearing level 0
    if( 0==inaik ) {
      n_orders_naik_current = n_order_naik_total;
    }
    else {
      n_orders_naik_current = n_orders_naik[inaik];
    }
    

    QOP_hisq_force_multi_smearing0_fnmat(info,residues+n_naik_shift, 
					 x+n_naik_shift, n_orders_naik_current,
					 force_accum_0, force_accum_0_naik);
    final_flop += info->final_flop;
 
    
    // smearing level 2
    if( 0==inaik ) {
      q_paths_sorted_current = q_paths_sorted_2;
      num_q_paths_current = num_q_paths_2;
      netbackdir_table_current = netbackdir_table_2;
    }
    else {
      q_paths_sorted_current = q_paths_sorted_3;
      num_q_paths_current = num_q_paths_3;
      netbackdir_table_current = netbackdir_table_3;
    }
    
    QOP_hisq_force_multi_smearing_fnmat( info,Wgf,residues+n_naik_shift, 
					 x+n_naik_shift, 
					 n_orders_naik_current, 
					 force_accum_1, 
					 force_accum_0, force_accum_0_naik, 
					 num_q_paths_current, 
					 q_paths_sorted_current, 
					 netbackdir_table_current );
    //QOP_printf0("HISQ smear0 flops = %g\n", info->final_flop);
    final_flop += info->final_flop;

    if( 0==inaik ) {
      coeff_mult = 1.0;
    }
    else {
      coeff_mult = eps_naik[inaik];
    }
    
    
    for(dir=XUP;dir<=TUP;dir++) {
      QDP_M_peq_r_times_M(force_accum_2[dir],&coeff_mult,
			  force_accum_1[dir],QDP_all);
      nflops += 36;
    }
    n_naik_shift += n_orders_naik[inaik];


  }

 

  if ( umethod==QOP_UNITARIZE_NONE ){

    // smearing level 1
    QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, 
					 x, 
					 nterms, force_accum_1, 
					 force_accum_2, NULL, 
					 num_q_paths_1, 
					 q_paths_sorted_1, 
					 netbackdir_table_1 );
    final_flop += info->final_flop;
    
  }
  else if ( umethod==QOP_UNITARIZE_RATIONAL ){

    
    // reunitarization
    QOP_hisq_force_multi_reunit(info,Vgf,force_accum_1u,
				force_accum_2);
    //QOP_printf0("reunit flops = %g\n", info->final_flop);
    final_flop += info->final_flop;
    
    // smearing level 1
    QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, 
					 x, 
					 nterms, force_accum_1, 
					 force_accum_1u, NULL, 
					 num_q_paths_1, 
					 q_paths_sorted_1, 
					 netbackdir_table_1 );
    //QOP_printf0("HISQ smear1 flops = %g\n", info->final_flop);
    final_flop += info->final_flop;
  }
  else
    {
      QOP_printf0("Unknown or unsupported unitarization method\n");
      exit(1);
      
    }


  // contraction with the link in question should be done here,
  // after contributions from all levels of smearing are taken into account

  for(dir=XUP;dir<=TUP;dir++){

    QDP_M_eq_M_times_M(force_final[dir],Ugf[dir],force_accum_1[dir],QDP_all);
    nflops += 198;

  }



  // take into account even/odd parity (it is NOT done in "smearing" routine)
  //eps multiplication done outside QOP 

  for(dir=XUP;dir<=TUP;dir++){
    QDP_M_eq_M(tmat,force_final[dir],QDP_all);

    treal = 2.0;
    QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_even);

    treal = -2.0;
    QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_odd);
    nflops += 18;

  }


  // Put antihermitian traceless part into momentum 
  // add force to momentum

  for(dir=XUP; dir<=TUP; dir++){

    QDP_M_eq_antiherm_M(mat_tmp0, force_final[dir], QDP_all);
    QDP_M_peq_M(force[dir], mat_tmp0, QDP_all);
    nflops += 24+18;
    //QDP_M_peq_M(force_final[dir], force[dir], QDP_all);
    //QDP_M_eq_antiherm_M(force[dir], force_final[dir], QDP_all);

  }



  for(i=XUP;i<=TUP;i++){
     QDP_destroy_M( force_accum_0[i] );
     QDP_destroy_M( force_accum_0_naik[i] );
     QDP_destroy_M( force_accum_1[i] );
     QDP_destroy_M( force_accum_1u[i] );
     QDP_destroy_M( force_accum_2[i] );
     QDP_destroy_M( force_final[i] );
  }

  QDP_destroy_M( tmat );
  QDP_destroy_M( mat_tmp0 );

  final_flop += ((double)nflops)*QDP_sites_on_node;

  info->final_sec = QDP_time() - dtime;
  info->final_flop = final_flop;
  info->status = QOP_SUCCESS;
  //QOP_printf0("HISQ force flops = %g\n", info->final_flop);
} //hisq_force_multi_wrapper_fnmat
コード例 #6
0
/* Smearing level i*/
static void 
QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, 
				    QDP_ColorMatrix * gf[4],
				    REAL *residues,
				    QDP_ColorVector *x[], 
				    int nterms, 
				    QDP_ColorMatrix *force_accum[4],
				    QDP_ColorMatrix *force_accum_old[4],
				    QDP_ColorMatrix *force_accum_naik_old[4],
				    int internal_num_q_paths,
				    Q_path *internal_q_paths_sorted,
				    int *internal_netbackdir_table)
{
  int i,j,k,lastdir=-99,ipath,ilink;
  int length,dir,odir;
  REAL coeff;

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];;
  QDP_ColorVector *vec_tmp[2];

  int netbackdir;
  size_t nflops = 0;

// table of net path displacements (backwards from usual convention)

  Q_path *this_path;	// pointer to current path

  /* Allocate fields */
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    oprod_along_path[i] = QDP_create_M();
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){ 
    // 0 element is never used (it's unit matrix)
    mats_along_path[i] = QDP_create_M();
  }

  mat_tmp0   = QDP_create_M();
  mat_tmp1   = QDP_create_M();
  for(i=0; i<8; i++) stmp[i] = QDP_create_M();
  tmat       = QDP_create_M();
  vec_tmp[0] = QDP_create_V();
  vec_tmp[1] = QDP_create_V();
 
  // clear force accumulators
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum[dir], QDP_all);

  // loop over paths, and loop over links in path 
  for( ipath=0; ipath<internal_num_q_paths; ipath++ ){
    this_path = &(internal_q_paths_sorted[ipath]); 
    if(this_path->forwback== -1)continue;	// skip backwards dslash 
    length = this_path->length;
    netbackdir = internal_netbackdir_table[ipath];

    // move f(i-1) force from current site in positive direction,
    //  this corresponds to outer product |X><Y| calculated at the endpoint of the path 
    if( netbackdir<8) { // Not a Naik path
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_old[OPP_DIR(netbackdir)],
				 tmat, netbackdir );
    }
    else { // Naik path
      if( NULL==force_accum_naik_old ) {
        QOP_printf0( "hisq_force_multi_smearing_fnmat:  mismatch:\n" );
        QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" );
        exit(0);
      }
      // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_naik_old[OPP_DIR(netbackdir-8)],
				 tmat, netbackdir );
    }

    // figure out how much of the outer products along the path must be
    // recomputed. j is last one needing recomputation. k is first one.
    j=length-1; // default is recompute all
    if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0;

    for(ilink=j;ilink>=k;ilink--){
      link_transport_connection_qdp( oprod_along_path[length-ilink], 
				     oprod_along_path[length-ilink-1], gf,
				     mat_tmp0, stmp, this_path->dir[ilink]  );
      nflops += 198;
    }

    // maintain an array of transports "to this point" along the path.
    //	Don't recompute beginning parts of path if same as last path 
    ilink=0; // first link where new transport is needed
    // Sometimes we don't need the matrix for the last link
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;

    for( ; ilink<k; ilink++ ){
      if( ilink==0 ){
        dir = this_path->dir[0];
	if( GOES_FORWARDS(dir) ){
	  QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir],
		      QDP_backward, QDP_all);
	  QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all);
	  QDP_discard_M(tmat);
	}
	else{
	  QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all);
	}
      }
      else { // ilink != 0
        dir = OPP_DIR(this_path->dir[ilink]);

	link_transport_connection_qdp( mats_along_path[ilink+1], 
				       mats_along_path[ilink], gf,
				       mat_tmp0, stmp, dir );
	nflops += 198;
      }
    } // end loop over links

    // A path has (length+1) points, counting the ends.  At first
    //	 point, no "down" direction links have their momenta "at this
    //	 point". At last, no "up" ... 
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;
    for( ilink=0; ilink<=k; ilink++ ){
      if(ilink<length)dir = this_path->dir[ilink];
      else dir=NODIR;
      coeff = this_path->coeff;
      if( (ilink%2)==1 )coeff = -coeff;
      // add in contribution to the force 
      if( ilink<length && GOES_FORWARDS(dir) ){
	link_gather_connection_qdp(mat_tmp1, 
		       oprod_along_path[length-ilink-1], tmat, dir );
        if(ilink==0) 
	  {
	    QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all);
	  }
        else
	  {
	    QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], 
				mat_tmp1, QDP_all);
	    nflops += 198;
	    QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	  }
	QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      if( ilink>0 && GOES_BACKWARDS(lastdir) ){
	odir = OPP_DIR(lastdir);
        if( ilink==1 ){
	  QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all);
	  QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	}
        else{
	  link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], 
				     tmat, odir );
	  QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], 
			      mat_tmp1, QDP_all);
	  nflops += 198;
	  QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all);
        }
	QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      lastdir = dir;
    } // end loop over links in path //
  } // end loop over paths //

  QDP_destroy_V( vec_tmp[0] );
  QDP_destroy_V( vec_tmp[1] );
  QDP_destroy_M( mat_tmp0 );
  QDP_destroy_M( mat_tmp1 );
  QDP_destroy_M( tmat );
  for(i=0; i<8; i++) QDP_destroy_M(stmp[i]);
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( oprod_along_path[i] );
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( mats_along_path[i] );
  }

  info->final_flop = ((double)nflops)*QDP_sites_on_node;

  return;
}//hisq_force_multi_smearing_fnmat
コード例 #7
0
void 
QOP_hisq_deriv_multi_fnmat2_qdp(QOP_info_t *info,  
				QOP_FermionLinksHisq *flh,
				QDP_ColorMatrix *deriv[],
				QOP_hisq_coeffs_t *hisq_coeff,
				REAL *residues,
				QDP_ColorVector *x[], 
				int *n_orders_naik)
{
#define NC QDP_get_nc(deriv[0])
  if(!QOP_asqtad.inited) QOP_asqtad_invert_init();

  double dtime = QDP_time();
  double totalflops = 0;
  int siteflops = 0;
  QOP_info_t tinfo;

  QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4];
  for(int i=0; i<4; i++) {
    Ugf[i] = flh->U_links[i];
    Vgf[i] = flh->V_links[i];
    Wgf[i] = flh->W_unitlinks[i];
  }

  QDP_ColorMatrix *force_accum_0[4];
  QDP_ColorMatrix *force_accum_0_naik[4];
  QDP_ColorMatrix *force_accum_1[4];
  QDP_ColorMatrix *force_accum_1u[4];
  QDP_ColorMatrix *force_accum_2[4];
  QDP_ColorMatrix *force_final[4];
  QDP_ColorMatrix *tmat = QDP_create_M();
  for(int i=0; i<4; i++) {
     force_accum_0[i] = QDP_create_M();
     force_accum_0_naik[i] = QDP_create_M();
     force_accum_1[i] = QDP_create_M();
     force_accum_1u[i] = QDP_create_M();
     force_accum_2[i] = QDP_create_M();
     force_final[i] = QDP_create_M();
     QDP_M_eq_zero(force_accum_2[i], QDP_all);
  }

  int n_naiks = hisq_coeff->n_naiks;
  int nterms = 0;
  for(int inaik = 0; inaik < n_naiks; inaik++)
    nterms += n_orders_naik[inaik];

  // loop on different naik masses
  int n_naik_shift = 0;
  for(int inaik=0; inaik<n_naiks; inaik++) {
    int n_orders_naik_current;
    if( inaik==0 ) {
      n_orders_naik_current = nterms;
    } else {
      n_orders_naik_current = n_orders_naik[inaik];
    }

    QOP_get_mid(&tinfo, force_accum_0, QDP_neighbor, 4, residues+n_naik_shift,
		1, x+n_naik_shift, n_orders_naik_current);
    totalflops += tinfo.final_flop;
    QOP_get_mid(&tinfo, force_accum_0_naik, QOP_common.neighbor3, 4,
		residues+n_naik_shift, 1, x+n_naik_shift,
		n_orders_naik_current);
    totalflops += tinfo.final_flop;
    // compensate for -1 on odd sites here instead of at end
    for(int dir=0; dir<4; dir++) {
      QDP_M_eqm_M(force_accum_0[dir], force_accum_0[dir], QDP_odd);
      QDP_M_eqm_M(force_accum_0_naik[dir], force_accum_0_naik[dir], QDP_odd);
    }

    // smearing level 0
    for(int i=0; i<4; i++) QDP_M_eq_zero(force_accum_1[i], QDP_all);
    if(inaik==0) {
      QOP_asqtad_coeffs_t acoef;
      acoef.one_link = hisq_coeff->asqtad_one_link;
      acoef.three_staple = hisq_coeff->asqtad_three_staple;
      acoef.five_staple = hisq_coeff->asqtad_five_staple;
      acoef.seven_staple = hisq_coeff->asqtad_seven_staple;
      acoef.lepage = hisq_coeff->asqtad_lepage;
      acoef.naik = hisq_coeff->asqtad_naik;
      QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef,
		       force_accum_0, force_accum_0_naik);
      //QOP_printf0("HISQ smear0 flops = %g\n", tinfo.final_flop);
      totalflops += tinfo.final_flop;
    } else {
      QOP_asqtad_coeffs_t acoef;
      acoef.one_link = hisq_coeff->difference_one_link;
      acoef.three_staple = 0;
      acoef.five_staple = 0;
      acoef.seven_staple = 0;
      acoef.lepage = 0;
      acoef.naik = hisq_coeff->difference_naik;
      QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef,
		       force_accum_0, force_accum_0_naik);
      totalflops += tinfo.final_flop;
    }

    QLA_Real coeff_mult;
    if( inaik==0 ) {
      coeff_mult = 1.0;
    } else {
      coeff_mult = hisq_coeff->eps_naik[inaik];
    }
    for(int dir=0; dir<4; dir++) {
      QDP_M_peq_r_times_M(force_accum_2[dir], &coeff_mult,
			  force_accum_1[dir], QDP_all);
    }
    siteflops += 4*36;

    n_naik_shift += n_orders_naik[inaik];
  }

  // smearing level 1
  QOP_asqtad_coeffs_t acoef;
  acoef.one_link = hisq_coeff->fat7_one_link;
  acoef.three_staple = hisq_coeff->fat7_three_staple;
  acoef.five_staple = hisq_coeff->fat7_five_staple;
  acoef.seven_staple = hisq_coeff->fat7_seven_staple;
  acoef.lepage = 0;
  acoef.naik = 0;
  if(QOP_hisq_links.use_fat7_lepage) {
    acoef.lepage = hisq_coeff->fat7_lepage;
  }

  QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod;
  if ( umethod==QOP_UNITARIZE_NONE ){

    for(int dir=0; dir<4; dir++)
      QDP_M_eq_zero(force_accum_1[dir], QDP_all);
    QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef,
		     force_accum_2, NULL);
    totalflops += tinfo.final_flop;

  } else if ( umethod==QOP_UNITARIZE_RATIONAL ) {

    for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all);
    // reunitarization
#if QOP_Colors == 3
    QOP_hisq_force_multi_reunit(&tinfo, Vgf, force_accum_2, force_accum_1u);
#else
    for(int mu=0; mu<4; mu++) {
      QOP_projectU_deriv_qdp(&tinfo, force_accum_2[mu], Wgf[mu], Vgf[mu], force_accum_1u[mu]);
    }
#endif
    //QOP_printf0("reunit flops = %g\n", tinfo.final_flop);
    for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all);
    totalflops += tinfo.final_flop;

    for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all);
    QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef,
		     force_accum_1u, NULL);
    //QOP_printf0("HISQ smear1 flops = %g\n", tinfo.final_flop);
    totalflops += tinfo.final_flop;

  } else {
    QOP_printf0("Unknown or unsupported unitarization method\n");
    exit(1);
  }

  // take into account even/odd parity (it is NOT done in "smearing" routine)
  // eps multiplication done outside QOP 
  // extra factor of 2
  for(int dir=0; dir<4; dir++) {
    QLA_Real treal = 2;
    //QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_even);
    //QDP_M_meq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_odd);
    QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_all);
  }
  siteflops += 4*36;

  for(int i=0; i<4; i++) {
     QDP_destroy_M( force_accum_0[i] );
     QDP_destroy_M( force_accum_0_naik[i] );
     QDP_destroy_M( force_accum_1[i] );
     QDP_destroy_M( force_accum_1u[i] );
     QDP_destroy_M( force_accum_2[i] );
     QDP_destroy_M( force_final[i] );
  }
  QDP_destroy_M( tmat );

  totalflops += ((double)siteflops)*QDP_sites_on_node;
  info->final_sec = QDP_time() - dtime;
  info->final_flop = totalflops;
  info->status = QOP_SUCCESS;
#undef NC
}
コード例 #8
0
void 
QOP_symanzik_1loop_gauge_heatbath_qdp(QOP_info_t *info,
				      QDP_ColorMatrix *links[],
				      QLA_Real beta,
				      QOP_gauge_coeffs_t *coeffs,
				      QDP_RandomState *rs0,
				      int nup, int nhb, int nover)
{
#define NC QDP_get_nc(links[0])
  double dtime = QOP_time();
  double nflops = 0;
  if(coeffs->adjoint_plaquette) {
    QOP_error("%s: adj plaq not supported\n", __func__);
  }
  fac = beta/QLA_Nc;
  int imp = (coeffs->rectangle!=0)||(coeffs->parallelogram!=0);
  QDP_Lattice *lat = QDP_get_lattice_M(links[0]);
  int nd = QDP_ndim_L(lat);
  QDP_Subset *cbs=QDP_even_and_odd_L(lat);
  int ncb = 2;
  if(imp) {
    ncb = 32;
    cbs = QOP_get_sub32(lat);
  }

  QDP_ColorMatrix *staple = QDP_create_M_L(lat);
  QDP_ColorMatrix *v = QDP_create_M_L(lat);
  QDP_ColorMatrix *tmp = QDP_create_M_L(lat);
  rs = QDP_expose_S(rs0);

  for(int up=0; up<nup; up++) {
    for(int hb=0; hb<nhb; hb++) {
      for(int cb=0; cb<ncb; cb++) {
	QDP_Subset subset = cbs[cb];
	for(int mu=0; mu<nd; mu++) {
	  QDP_M_eq_zero(staple, subset);
	  QOP_symanzik_1loop_gauge_staple_qdp(info, links, staple, mu, coeffs, cbs, cb);
	  QDP_M_eq_M_times_Ma(v, links[mu], staple, subset);
	  QDP_M_eq_funcit(v, hb_func, subset);
	  QDP_M_eq_M_times_M(tmp, v, links[mu], subset);
	  QDP_M_eq_M(links[mu], tmp, subset);
	}
      }
    }
    for(int over=0; over<nover; over++) {
      for(int cb=0; cb<ncb; cb++) {
	QDP_Subset subset = cbs[cb];
	for(int mu=0; mu<nd; mu++) {
	  QDP_M_eq_zero(staple, subset);
	  QOP_symanzik_1loop_gauge_staple_qdp(info, links, staple, mu, coeffs, cbs, cb);
	  QDP_M_eq_M_times_Ma(v, links[mu], staple, subset);
	  QDP_M_eq_funcit(v, over_func, subset);
	  QDP_M_eq_M_times_M(tmp, v, links[mu], subset);
	  QDP_M_eq_M(links[mu], tmp, subset);
	}
      }
    }
  }

  QDP_reset_S(rs0);
  QDP_destroy_M(tmp);
  QDP_destroy_M(v);
  QDP_destroy_M(staple);

  info->final_sec = QOP_time() - dtime;
  info->final_flop = nflops*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
#undef NC
}
コード例 #9
0
void
start(void)
{
  double mf, best_mf;
  QLA_Real plaq;
  QDP_ColorMatrix **u;
  int i, bs, bsi, best_bs;

  u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *));
  for(i=0; i<ndim; i++) u[i] = QDP_create_M();
  get_random_links(u, ndim, 0.3);

  plaq = get_plaq(u);
  if(QDP_this_node==0) printf("plaquette = %g\n", plaq);
  
  QOP_layout_t qoplayout = QOP_LAYOUT_ZERO;
  qoplayout.latdim = ndim;
  qoplayout.latsize = (int *) malloc(ndim*sizeof(int));
  for(i=0; i<ndim; i++) {
    qoplayout.latsize[i] = lattice_size[i];
  }
  qoplayout.machdim = -1;

  if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); }
  QOP_init(&qoplayout);

  gauge = QOP_create_G_from_qdp(u);

  QOP_Force *force;

  QDP_ColorMatrix *cm[4];
  for(i=0; i<4; i++) {
    cm[i] = QDP_create_M();
    QDP_M_eq_zero(cm[i], QDP_all);
  }

  QOP_gauge_coeffs_t gcoeffs = QOP_GAUGE_COEFFS_ZERO;
  gcoeffs.plaquette  = 0.2;
  gcoeffs.rectangle  = 0.2;
  gcoeffs.parallelogram   = 0.2;
  gcoeffs.adjoint_plaquette = 0.2;

  force = QOP_create_F_from_qdp(cm);
  mf = bench_action(&gcoeffs, force);
  QOP_destroy_F(force);
  printf0("action: sec%7.4f mflops = %g\n", secs, mf);

  if(QDP_this_node==0) { printf("begin force\n"); fflush(stdout); }

  best_mf = 0;
  best_bs = bsa[0];
  for(bsi=0; bsi<bsn; bsi++) {
    bs = bsa[bsi];
    QDP_set_block_size(bs);
    force = QOP_create_F_from_qdp(cm);
    mf = bench_force(&gcoeffs, force);
    QOP_destroy_F(force);
    printf0("GF: bs%5i sec%7.4f mflops = %g\n", bs, secs, mf);
    if(mf>best_mf) {
      best_mf = mf;
      best_bs = bs;
    }
  }

  QDP_set_block_size(best_bs);
  QDP_profcontrol(1);
  force = QOP_create_F_from_qdp(cm);
  mf = bench_force(&gcoeffs, force);
  QDP_profcontrol(0);
  printf0("prof: GF: bs%5i sec%7.4f mflops = %g\n", best_bs, secs, mf);

  printf0("best: GF: bs%5i mflops = %g\n", best_bs, best_mf);

  if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); }
  //QOP_asqtad_invert_unload_links();
  if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); }
  QOP_finalize();
}