コード例 #1
0
// like link_transport, except doesn't multiply by link matrices.  
// use this, for example,
// when storing the intermediate HISQ force (a connection) at the lattice site
// associated with a link
static void 
link_gather_connection_qdp( QDP_ColorMatrix *dest, 
			    QDP_ColorMatrix *src,
			    QDP_ColorMatrix *work,
			    int dir ){



  if (dir >= 8) //3 link shift needed
    {
      dir=dir-8;

      //do initial 2 shifts
      if( GOES_FORWARDS(dir) ) {
	
	QDP_M_eq_sM(dest, src, QDP_neighbor[dir], QDP_forward, QDP_all);
	QDP_M_eq_sM(work, dest, QDP_neighbor[dir], QDP_forward, QDP_all);

      }
      else { /* GOES_BACKWARDS(dir) */
	
	QDP_M_eq_sM(dest, src, QDP_neighbor[OPP_DIR(dir)], 
		    QDP_backward, QDP_all);
	QDP_M_eq_sM(work, dest, QDP_neighbor[OPP_DIR(dir)], 
		    QDP_backward, QDP_all);

      }
    }
  else{ //only 1 link shift needed

    QDP_M_eq_M(work, src,  QDP_all);

  }

 

  //do final shift
  if( GOES_FORWARDS(dir) ) {

    QDP_M_eq_sM(dest, work, QDP_neighbor[dir], QDP_forward, QDP_all);

  }
  else { /* GOES_BACKWARDS(dir) */

    QDP_M_eq_sM(dest, work, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all);

  }


} /* link_gather_connection_qdp */
コード例 #2
0
/* Put antihermitian traceless part into momentum */
static void
add_forces_to_mom(QDP_ColorVector **back_qdp, QDP_ColorVector **forw_qdp, 
		  int dir, REAL coeff[], int nsrc)
{
  REAL tmp_coeff[nsrc];
  QDP_ColorMatrix *tm[nsrc];
  int i;

  QOP_trace("test 481\n");
  if(GOES_BACKWARDS(dir)) {
    dir = OPP_DIR(dir); 
    for(i=0; i<nsrc; i++) {
      tmp_coeff[i] = -coeff[i];
    }
  } else {
    for(i=0; i<nsrc; i++) {
      tmp_coeff[i] = coeff[i];
    }
  }
  QOP_trace("test 482\n");

  for(i=0; i<nsrc; i++) {
    QDP_V_eq_r_times_V(tv[i], &tmp_coeff[i], forw_qdp[i], QDP_all);
    tm[i] = tempmom_qdp[dir];
  }
  QOP_trace("test 483\n");
  QDP_M_vpeq_V_times_Va(tm, back_qdp, tv, QDP_all, nsrc);
  QOP_trace("test 484\n");
}
コード例 #3
0
ファイル: com_vanilla.c プロジェクト: daschaich/susy
// Find coordinates of neighbor
// Used by make_gather for nearest neighbor gathers
static void neighbor_coords_special(
  int x, int y, int z, int t,       // Coordinates of site
  int *dirpt,                       // Direction (eg XUP)
  int fb,                           // Forwards/backwards
  int *x2p, int *y2p, int *z2p, int *t2p)
                                    // Pointers to coordinates of neighbor
{
  int dir;

  dir = (fb==FORWARDS) ? *dirpt : OPP_DIR(*dirpt);
  *x2p = x;
  *y2p = y;
  *z2p = z;
  *t2p = t;
  switch(dir) {
    case XUP   : *x2p = (x + 1) % nx;      break;
    case XDOWN : *x2p = (x + nx - 1) % nx; break;
    case YUP   : *y2p = (y + 1) % ny;      break;
    case YDOWN : *y2p = (y + ny - 1) % ny; break;
    case ZUP   : *z2p = (z + 1) % nz;      break;
    case ZDOWN : *z2p = (z + nz - 1) % nz; break;
    case TUP   : *t2p = (t + 1) % nt;      break;
    case TDOWN : *t2p = (t + nt - 1) % nt; break;
    default: printf("BOTCH: bad direction\n"); terminate(1);
  }
}
コード例 #4
0
ファイル: fuz_prop.c プロジェクト: erinaldi/milc_qcd
void fuz_prop(field_offset fprop, int r0)
{
register int i;
register site *s; 

int  dir, k;

msg_tag *tag0, *tag1;

    /* Save unfuzzed propagator in xxx */
    copy_latvec( fprop, F_OFFSET(xxx), EVENANDODD);
    /* Give central value weight two */
    scalar_mult_latvec( F_OFFSET(xxx), 2.0, fprop, EVENANDODD);

    if (r0 > 0){

	for(dir=XUP;dir<=ZUP;dir++){
	    /* Start gathering for 'backward' link-product */
	    tag0 = start_gather_site(F_OFFSET(xxx), sizeof(su3_vector),
		dir, EVENANDODD, gen_pt[0]);

	    /* Start 'forward' link-product */
	    FORALLSITES(i,s) {
		mult_adj_su3_mat_vec(&(s->link[dir]), &(s->xxx), &(s->ttt));
	    }
	    tag1 = start_gather_site(F_OFFSET(ttt), sizeof(su3_vector),
		OPP_DIR(dir), EVENANDODD, gen_pt[1]);

	    for(k=1;k<r0;k++) {
		wait_gather(tag0);
/*
		copy_latvec( (field_offset)gen_pt[0], F_OFFSET(resid),
		    EVENANDODD);
*/
		FORALLSITES(i,s) {
		    su3vec_copy((su3_vector *)gen_pt[0][i], &(s->resid));
		}
		FORALLSITES(i,s) {
		    mult_su3_mat_vec(&(s->link[dir]), &(s->resid), &(s->cg_p));
		}
		if(k==1) {
		    cleanup_gather(tag0);
		    tag0 = start_gather_site(F_OFFSET(cg_p), sizeof(su3_vector),
			dir, EVENANDODD, gen_pt[0]);
		}
		else {
		    restart_gather_site(F_OFFSET(cg_p), sizeof(su3_vector),
			dir, EVENANDODD, gen_pt[0], tag0);
		}

		wait_gather(tag1);
/*
		copy_latvec( (field_offset)gen_pt[1], F_OFFSET(resid),
		    EVENANDODD);
*/
		FORALLSITES(i,s) {
		    su3vec_copy((su3_vector *)gen_pt[1][i], &(s->resid));
		}
コード例 #5
0
ファイル: dslash_fn2.c プロジェクト: lattice/milc
void cleanup_gathers(msg_tag *tags1[], msg_tag *tags2[])
{
  int i;

  for(i=XUP;i<=TUP;i++){
    cleanup_gather( tags1[i] );
    cleanup_gather( tags1[OPP_DIR(i)] );
    cleanup_gather( tags2[i] );
    cleanup_gather( tags2[OPP_DIR(i)] );
  }

  for(i=X3UP;i<=T3UP;i++){
    cleanup_gather( tags1[i] );
    cleanup_gather( tags1[OPP_3_DIR(i)] );
    cleanup_gather( tags2[i] );
    cleanup_gather( tags2[OPP_3_DIR(i)] );
  }
}
コード例 #6
0
/* special case to transport a "connection" by one link, does both parities */
static void 
link_transport_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src,
			       QDP_ColorMatrix *gf[4], QDP_ColorMatrix *work,
                               QDP_ColorMatrix *st[8], int dir ){
  if( GOES_FORWARDS(dir) ) {
    QDP_M_eq_M(work, src, QDP_all);
    QDP_M_eq_sM(st[dir], work, QDP_neighbor[dir], QDP_forward, QDP_all);
    QDP_M_eq_M_times_M(dest, gf[dir], st[dir], QDP_all);
    QDP_discard_M(st[dir]);
  }
  else { /* GOES_BACKWARDS(dir) */
    QDP_M_eq_Ma_times_M(work, gf[OPP_DIR(dir)], src, QDP_all);
    QDP_M_eq_sM(st[dir], work, QDP_neighbor[OPP_DIR(dir)], 
		QDP_backward,QDP_all);
    QDP_M_eq_M(dest, st[dir], QDP_all);
    QDP_discard_M(st[dir]);
  }
} /* link_transport_connection_qdp */
コード例 #7
0
ファイル: update_h.c プロジェクト: erinaldi/milc_qcd
/* update the momenta with the gauge force */
void gauge_force(Real eps) {
register int i,dir1,dir2;
register site *st;
msg_tag *tag0,*tag1,*tag2;
int start;
su3_matrix tmat1,tmat2;
register Real eb3;

/**double dtime,dclock();
dtime = -dclock();**/

    eb3 = eps*beta/3.0;
    /* Loop over directions, update mom[dir1] */
    for(dir1=XUP; dir1<=TUP; dir1++){
	/* Loop over other directions, computing force from plaquettes in
	   the dir1,dir2 plane */
	start=1; /* indicates staple sum not initialized */
	for(dir2=XUP;dir2<=TUP;dir2++)if(dir2 != dir1){

	    /* get link[dir2] from direction dir1 */
	    tag0 = start_gather_site( F_OFFSET(link[dir2]), sizeof(su3_matrix),
		dir1, EVENANDODD, gen_pt[0] );

	    /* Start gather for the "upper staple" */
	    tag2 = start_gather_site( F_OFFSET(link[dir1]), sizeof(su3_matrix),
		dir2, EVENANDODD, gen_pt[2] );

	    /* begin the computation "at the dir2DOWN point", we will
		later gather the intermediate result "to the home point" */

	    wait_gather(tag0);
	    FORALLSITES(i,st){
	        mult_su3_an( &(st->link[dir2]), &(st->link[dir1]), &tmat1 );
	        mult_su3_nn( &tmat1, (su3_matrix *)gen_pt[0][i],
		    &(st->tempmat1) );
	    }

	    /* Gather this partial result "up to home site" */
	    tag1 = start_gather_site( F_OFFSET(tempmat1), sizeof(su3_matrix),
		OPP_DIR(dir2), EVENANDODD, gen_pt[1] );

	    /* begin the computation of the "upper" staple.  Note that
		one of the links has already been gathered, since it
		was used in computing the "lower" staple of the site
		above us (in dir2) */
	    wait_gather(tag2);
	    if(start){	/* this is the first contribution to staple */
	        FORALLSITES(i,st){
		    mult_su3_nn( &(st->link[dir2]), (su3_matrix *)gen_pt[2][i],
		        &tmat1);
		    mult_su3_na( &tmat1, (su3_matrix *)gen_pt[0][i],
			&(st->staple) );
		}
		start=0;
	    }
コード例 #8
0
ファイル: com_vanilla.c プロジェクト: rgjha/susy
// -----------------------------------------------------------------
// Functions used for gathers
// Sort a list of four gather_t structures into the order we want:
// XUP, TUP, TDOWN, XDOWN
// Start from the index for the first pointer
void sort_four_gathers(int index) {
  gather_t tt[4];
  int i;

  for (i = 0; i < 4; i++)
    memcpy(&tt[i], &gather_array[index + i], sizeof(gather_t));
  for (i = XUP; i <= TUP; i++) {
    memcpy(&gather_array[index + i], &tt[2 * i], sizeof(gather_t));
    memcpy(&gather_array[index + OPP_DIR(i)], &tt[2 * i + 1], sizeof(gather_t));
  }
}
コード例 #9
0
ファイル: com_vanilla.c プロジェクト: daschaich/susy
// -----------------------------------------------------------------
// Functions used for gathers
// Sort a list of eight gather_t structures into the order we want:
// XUP, YUP, ZUP, TUP, TDOWN, ZDOWN, YDOWN, XDOWN
// Start from the index for the first pointer
void sort_eight_gathers(int index) {
  gather_t tt[8];
  int i;

  for (i = 0; i < 8; i++)
    memcpy(&tt[i], &gather_array[index + i], sizeof(gather_t));
  FORALLUPDIR(i) {
    memcpy(&gather_array[index + i], &tt[2 * i], sizeof(gather_t));
    memcpy(&gather_array[index + OPP_DIR(i)],
           &tt[2 * i + 1], sizeof(gather_t));
  }
}
コード例 #10
0
ファイル: gauss_smear_ks.c プロジェクト: lattice/milc
/*------------------------------------------------------------*/
static void 
malloc_kg_temps(){
  int dir;
  
  for(dir=0;dir<8;dir++)wtmp[dir] = NULL;

  FORALLUPDIRBUT(TUP,dir){
    wtmp[dir] =(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    if(wtmp[dir] == NULL){
      printf("node %d can't malloc wtmp[%d]\n",this_node,dir);
      terminate(1);
    }
    memset(wtmp[dir],'\0',sites_on_node*sizeof(su3_vector));
    
    wtmp[OPP_DIR(dir)] =(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    if(wtmp[OPP_DIR(dir)] == NULL){
      printf("node %d can't malloc wtmp[%d]\n",this_node,OPP_DIR(dir));
      terminate(1);
    }
    memset(wtmp[OPP_DIR(dir)],'\0',sites_on_node*sizeof(su3_vector));
  }
コード例 #11
0
static int 
find_backwards_gather( Q_path *path ){
  int disp[4], i;
  /* compute total displacement of path */
  for(i=XUP;i<=TUP;i++)disp[i]=0;
  for( i=0; i<path->length; i++){
    if( GOES_FORWARDS(path->dir[i]) )
      disp[        path->dir[i]  ]++;
    else
      disp[OPP_DIR(path->dir[i]) ]--;
  }
  
  // There must be an elegant way??
  if( disp[XUP]==+1 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(XDOWN);
  if( disp[XUP]==-1 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(XUP);
  if( disp[XUP]== 0 && disp[YUP]==+1 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(YDOWN);
  if( disp[XUP]== 0 && disp[YUP]==-1 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(YUP);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==+1 && disp[TUP]== 0 )
    return(ZDOWN);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==-1 && disp[TUP]== 0 )
    return(ZUP);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==+1 )
    return(TDOWN);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==-1 )
    return(TUP);
  
  if( disp[XUP]==+3 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(X3DOWN);
  if( disp[XUP]==-3 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(X3UP);
  if( disp[XUP]== 0 && disp[YUP]==+3 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(Y3DOWN);
  if( disp[XUP]== 0 && disp[YUP]==-3 && disp[ZUP]== 0 && disp[TUP]== 0 )
    return(Y3UP);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==+3 && disp[TUP]== 0 )
    return(Z3DOWN);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==-3 && disp[TUP]== 0 )
    return(Z3UP);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==+3 )
    return(T3DOWN);
  if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==-3 )
    return(T3UP);
  QOP_printf0("OOOPS: NODIR\n"); exit(0);
  return( NODIR );
} //find_backwards_gather
コード例 #12
0
ファイル: dslash_fn.c プロジェクト: erinaldi/milc_qcd
static void 
cleanup_one_gather_set(msg_tag *tags[])
{
  int i;

  for(i=XUP;i<=TUP;i++){
    cleanup_gather( tags[i] );
    cleanup_gather( tags[OPP_DIR(i)] );
  }

  for(i=X3UP;i<=T3UP;i++){
    cleanup_gather( tags[i] );
    cleanup_gather( tags[OPP_3_DIR(i)] );
  }
}
コード例 #13
0
ファイル: path.c プロジェクト: erinaldi/milc_qcd
void path(int *dir,int *sign,int length)
{
register int i;
register site *s;
msg_tag *mtag0, *mtag1;
int j;


/* j=0 */
	if(sign[0]>0)  {
	    mtag0 = start_gather_site( F_OFFSET(link[dir[0]]), sizeof(su3_matrix),
		OPP_DIR(dir[0]), EVENANDODD, gen_pt[0] );
	    wait_gather(mtag0);

	      FORALLSITES(i,s){
	      su3mat_copy((su3_matrix *)(gen_pt[0][i]),&(s->tempmat1) );
	      }
コード例 #14
0
ファイル: dslash_fn2.c プロジェクト: lattice/milc
void dslash_fn_site( field_offset src, field_offset dest, int parity,
		     fn_links_t *fn )
{
   register int dir;
   msg_tag *tag[16];

   dslash_fn_site_special(src, dest, parity, tag, 1, fn );
   
   /* free up the buffers */
   for(dir=XUP; dir<=TUP; dir++){
     cleanup_gather(tag[dir]);
     cleanup_gather(tag[OPP_DIR(dir)]);
   }
   for(dir=X3UP; dir<=T3UP; dir++){
     cleanup_gather(tag[dir]);
     cleanup_gather(tag[OPP_3_DIR(dir)]);
   }
} /* end dslash_fn_site */
コード例 #15
0
ファイル: dslash_fn2.c プロジェクト: lattice/milc
void dslash_fn_field( su3_vector *src, su3_vector *dest, int parity,
		      fn_links_t *fn) {
   register int dir;
   msg_tag *tag[16];

   dslash_fn_field_special(src, dest, parity, tag, 1, fn);
   
   /* free up the buffers */
   for(dir=XUP; dir<=TUP; dir++){
     cleanup_gather(tag[dir]);
     cleanup_gather(tag[OPP_DIR(dir)]);
   }
   
   for(dir=X3UP; dir<=T3UP; dir++){
     cleanup_gather(tag[dir]);
     cleanup_gather(tag[OPP_3_DIR(dir)]);
   }
}
コード例 #16
0
ファイル: setup.c プロジェクト: liu0604/milc_qcd
void third_neighbor(int x,int y,int z,int t,int *dirpt,int FB,int *xp,int *yp,int *zp,int *tp)
     /* int x,y,z,t,*dirpt,FB;  coordinates of site, direction (eg XUP), and
				"forwards/backwards"  */
     /* int *xp,*yp,*zp,*tp;    pointers to coordinates of neighbor */
{
   int dir;
   dir = (FB==FORWARDS) ? *dirpt : OPP_DIR(*dirpt);
   *xp = x; *yp = y; *zp = z; *tp = t;
   switch(dir){
     case XUP: *xp = (x+3)%nx; break;
     case XDOWN: *xp = (x+4*nx-3)%nx; break;
     case YUP: *yp = (y+3)%ny; break;
     case YDOWN: *yp = (y+4*ny-3)%ny; break;
     case ZUP: *zp = (z+3)%nz; break;
     case ZDOWN: *zp = (z+4*nz-3)%nz; break;
     case TUP: *tp = (t+3)%nt; break;
     case TDOWN: *tp = (t+4*nt-3)%nt; break;
     default: printf("third_neighb: bad direction\n"); exit(1);
   }
}
コード例 #17
0
/*  The 3 flavor version of side_link_force used *
 * to optimize fermion transports                */
static void
side_link_forces(int mu, int nu, REAL coeff[], QDP_ColorVector **Path,
		 QDP_ColorVector **Path_nu, QDP_ColorVector **Path_mu,
		 QDP_ColorVector **Path_numu, int nsrc)
{
  REAL m_coeff[nsrc];
  int i;

  for(i=0; i<nsrc; i++) {
    m_coeff[i] = -coeff[i];
  }

  if(GOES_FORWARDS(mu))
    {
      /*                    nu           * 
       * Add the force :  +----+         *
       *               mu |    |         *
       *                  x    (x)       *
       *                  o    o         */
      if(GOES_FORWARDS(nu))
	add_forces_to_mom(Path_numu, Path, mu, coeff, nsrc);
      else
	//add_forces_to_mom(Path,Path_numu,OPP_DIR(mu),m_coeff, nsrc);
	add_forces_to_mom(Path_numu,Path,mu,m_coeff, nsrc);
    }
  else /*GOES_BACKWARDS(mu)*/
    {
      /* Add the force :  o    o         *
       *               mu |    |         *
       *                  x    (x)       *
       *                  +----+         *
       *                    nu           */ 
      if(GOES_FORWARDS(nu))
	add_forces_to_mom(Path_nu, Path_mu, mu, m_coeff, nsrc);
      else
	add_forces_to_mom(Path_mu, Path_nu, OPP_DIR(mu), coeff, nsrc);
    }
}
コード例 #18
0
void
QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[],
				 QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef,
				 REAL eps[], QDP_ColorVector *xin[], int nsrc)
{
#define NC QDP_get_nc(xin[0])
  REAL coeff[nsrc];
  REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc];
  REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc];

  QDP_ColorVector *P3[8][nsrc];

  QDP_ColorVector *P5[8][nsrc];
  QDP_ColorVector *P5tmp[8][8][nsrc];
  QDP_ColorVector *P5s[4][nsrc];
  QDP_ColorVector *P5tmps[4][8][nsrc];

  //QDP_ColorVector *xin[nsrc];
  QDP_ColorVector *xintmp[8][nsrc];
  QDP_ColorVector *Pmu[nsrc];
  QDP_ColorVector *Pmutmp[8][nsrc];
  QDP_ColorVector *Pnumu[nsrc];
  QDP_ColorVector *Pnumutmp[8][nsrc];
  QDP_ColorVector *Prhonumu[nsrc];
  QDP_ColorVector *Prhonumutmp[8][nsrc];
  QDP_ColorVector *P7[nsrc];
  QDP_ColorVector *P7tmp[8][nsrc];
  QDP_ColorVector *P7rho[nsrc];
  QDP_ColorVector *ttv[nsrc];

  int i, dir;
  int mu, nu, rho, sig;

  double nflop1 = 253935;
  double nflop2 = 433968;
  double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1);
  double dtime;
  dtime = -QOP_time();

  ASQTAD_FORCE_BEGIN;

  QOP_trace("test 1\n");
  /* setup parallel transport */
  QDP_ColorMatrix *tmpmat = QDP_create_M();
  for(i=0; i<QOP_common.ndim; i++) {
    fbshift[i] = QDP_neighbor[i];
    fbshiftdir[i] = QDP_forward;
    fblink[i] = links[i];
    fbshift[OPP_DIR(i)] = QDP_neighbor[i];
    fbshiftdir[OPP_DIR(i)] = QDP_backward;
    fblink[OPP_DIR(i)] = QDP_create_M();
    QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all);
    QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all);
  }

  tv = ttv;
  for(i=0; i<nsrc; i++) {
    tv[i] = QDP_create_V();
  }

  QOP_trace("test 2\n");
  /* Allocate temporary vectors */
  for(i=0; i<nsrc; i++) {
    Pmu[i] = QDP_create_V();
    Pnumu[i] = QDP_create_V();
    Prhonumu[i] = QDP_create_V();
    P7[i] = QDP_create_V();
    P7rho[i] = QDP_create_V();
    for(dir=0; dir<8; dir++) {
      xintmp[dir][i] = QDP_create_V();
      Pmutmp[dir][i] = QDP_create_V();
      Pnumutmp[dir][i] = QDP_create_V();
      Prhonumutmp[dir][i] = QDP_create_V();
      P7tmp[dir][i] = QDP_create_V();
    }
#if 1
    for(mu=0; mu<4; mu++) {
      P5s[mu][i] = QDP_create_V();
      for(dir=0; dir<8; dir++) {
	P5tmps[mu][dir][i] = QDP_create_V();
      }
    }
#else
    for(mu=0; mu<8; mu++) {
      P5[mu][i] = QDP_create_V();
      for(dir=0; dir<8; dir++) {
	P5tmp[mu][dir][i] = QDP_create_V();
	//printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout);
	if(P5tmp[mu][dir][i]==NULL) {
	  fprintf(stderr, "error: can't create V\n");
	  QDP_abort();
	}
      }
    }
#endif
  }
  //printf("%p\n", P5tmp[0][4][0]); fflush(stdout);

  for(mu=0; mu<8; mu++) {
    for(i=0; i<nsrc; i++) {
      P3[mu][i] = QDP_create_V();
      //P5[mu][i] = QDP_create_V();
    }
  }

  for(mu=0; mu<4; mu++) {
    tempmom_qdp[mu] = force[mu];
    QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd);
  }

  /* Path coefficients times fermion epsilon */
  /* Load path coefficients from table */
  for(i=0; i<nsrc; i++) {
    OneLink[i] = coef->one_link     * eps[i];
    Naik[i]    = coef->naik         * eps[i]; mNaik[i]    = -Naik[i];
    ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i];
    FiveSt[i]  = coef->five_staple  * eps[i]; mFiveSt[i]  = -FiveSt[i];
    SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i];
    Lepage[i]  = coef->lepage       * eps[i]; mLepage[i]  = -Lepage[i];
  }

#if 0
  printf("nsrc = %i\n", nsrc);
  printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0],
	 SevenSt[0], Lepage[0], Naik[0]);
#endif

  /* *************************************** */

  QOP_trace("start force loop\n");
  for(mu=0; mu<8; mu++) {
    //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]);
    u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]);

    for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) {
      //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]);
      u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]);

      if(GOES_FORWARDS(sig)) {
	/* Add the force F_sig[x+mu]:         x--+             *
	 *                                   |   |             *
	 *                                   o   o             *
	 * the 1 link in the path: - (numbering starts form 0) */
	add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc);
      }
    }

    for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) {
      int nP5 = 0;
      //Pnumu = hw_qdp[OPP_DIR(nu)];
      //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]);
      u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]);
      //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc);
      for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) &&
				   (sig!=nu)&&(sig!=OPP_DIR(nu)) ) {
#if 1
	for(i=0; i<nsrc; i++) {
	  P5[sig][i] = P5s[nP5][i];
	  for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i];
	}
#endif
	nP5++;
	//u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]);
	u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]);

	if(GOES_FORWARDS(sig)) {
	  /* Add the force F_sig[x+mu+nu]:      x--+             *
	   *                                   |   |             *
	   *                                   o   o             *
	   * the 2 link in the path: + (numbering starts form 0) */
	  add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc);
	}
      }
      QOP_trace("test 4\n");
      for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) &&
				   (rho!=nu)&&(rho!=OPP_DIR(nu)) ) {
	//Prhonumu = hw_qdp[OPP_DIR(rho)];
	//u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), 
	//		 temp_hw[OPP_DIR(rho)] );
	  u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc,
			     Pnumutmp[OPP_DIR(rho)]);
	  //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc);
	for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) &&
				     (sig!=nu )&&(sig!=OPP_DIR(nu )) &&
				     (sig!=rho)&&(sig!=OPP_DIR(rho)) ) {
	  /* Length 7 paths */
	  //P7 = hw_qdp[sig];
	  //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] );
  QOP_trace("test 43\n");
	  u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]);
  QOP_trace("test 44\n");
	  //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all);
	  //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all);
	  if(GOES_FORWARDS(sig)) {
	    /* Add the force F_sig[x+mu+nu+rho]:  x--+             *
	     *                                   |   |             *
	     *                                   o   o             *
	     * the 3 link in the path: - (numbering starts form 0) */
  QOP_trace("test 45\n");
	    add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc);
  QOP_trace("test 46\n");
	    //mom_meq_force(P7, Prhonumu, sig);
	  }
	  /* Add the force F_rho the 2(4) link in the path: +     */
	  //P7rho = hw_qdp[rho];
	  //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]);
  QOP_trace("test 47\n");
	  u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]);
  QOP_trace("test 48\n");
	  side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc);
  QOP_trace("test 49\n");
	  //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho);
	  /* Add the P7rho vector to P5 */
	  for(i=0; i<nsrc; i++) {
	    if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i];
	    else coeff[i] = 0;
  QOP_trace("test 410\n");
	    QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all);
  QOP_trace("test 411\n");
	  }
	} /* sig */
      } /* rho */
  QOP_trace("test 5\n");
#define P5nu P7
      for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) &&
				   (sig!=nu)&&(sig!=OPP_DIR(nu)) ) {
	/* Length 5 paths */
	/* Add the force F_nu the 1(3) link in the path: -     */
	//P5nu = hw_qdp[nu];
	//u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]);
	u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]);
	side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc);
	/* Add the P5nu vector to P3 */
	for(i=0; i<nsrc; i++) {
	  if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; 
	  else coeff[i] = 0;
	  QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all);
	}
      } /* sig */
    } /* nu */

#define Pmumu Pnumu
#define Pmumutmp Pnumutmp
#define P5sig Prhonumu
#define P5sigtmp Prhonumutmp
#define P3mu P7
#define Popmu P7
#define Pmumumu P7
    /* Now the Lepage term... It is the same as 5-link paths with
       nu=mu and FiveSt=Lepage. */
    //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] );
    u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]);

    for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) {
      //P5sig = hw_qdp[sig];
      //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]);
      u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]);
      if(GOES_FORWARDS(sig)) {
	/* Add the force F_sig[x+mu+nu]:      x--+             *
	 *                                   |   |             *
	 *                                   o   o             *
	 * the 2 link in the path: + (numbering starts form 0) */
	add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc);
      }
      /* Add the force F_nu the 1(3) link in the path: -     */
      //P5nu = hw_qdp[mu];
      //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]);
      u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]);
      side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc);
      /* Add the P5nu vector to P3 */
      for(i=0; i<nsrc; i++) {
	if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i];
	else coeff[i] = 0;
	QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all);
      }

      /* Length 3 paths (Not the Naik term) */
      /* Add the force F_mu the 0(2) link in the path: +     */
      if(GOES_FORWARDS(mu)) {
	//P3mu = hw_qdp[mu];  /* OK to clobber P5nu */
	//u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]);
	//u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]);
	for(i=0; i<nsrc; i++) {
	  QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all);
	}
	u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]);
      }
      /* The above shift is not needed if mu is backwards */
      side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc);
    }

    /* Finally the OneLink and the Naik term */
    if(GOES_BACKWARDS(mu)) {
      /* Do only the forward terms in the Dslash */
      /* Because I have shifted with OPP_DIR(mu) Pmu is a forward *
       * shift.                                                   */
      /* The one link */
      add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc);
      /* For the same reason Pmumu is the forward double link */

      /* Popmu is a backward shift */
      //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */
      //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]);
      u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]);
      /* The Naik */
      /* link no 1: - */
      add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc);
      /* Pmumumu can overwrite Popmu which is no longer needed */
      //Pmumumu = hw_qdp[OPP_DIR(mu)];
      //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]);
      u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]);
      /* link no 0: + */
      add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc);
    } else {
      /* The rest of the Naik terms */
      //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */
      //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]);
      u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]);
      /* link no 2: + */
      /* Pmumu is double backward shift */
      add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc);
    }
    /* Here we have to do together the Naik term and the one link term */

  }/* mu */
  QOP_trace("test 6\n");
  QOP_trace("test 7\n");

  for(mu=0; mu<4; mu++) {
    QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even);
    QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd);
    QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all);
  }
  QDP_destroy_M(tmpmat);

  //printf("%p\n", P5tmp[0][4][0]); fflush(stdout);
  //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
  /* Free temporary vectors */
  for(i=0; i<nsrc; i++) {
    QDP_destroy_V(Pmu[i]);
    QDP_destroy_V(Pnumu[i]);
    QDP_destroy_V(Prhonumu[i]);
    QDP_destroy_V(P7[i]);
    QDP_destroy_V(P7rho[i]);
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    for(dir=0; dir<8; dir++) {
      QDP_destroy_V(xintmp[dir][i]);
      QDP_destroy_V(Pmutmp[dir][i]);
      QDP_destroy_V(Pnumutmp[dir][i]);
      QDP_destroy_V(Prhonumutmp[dir][i]);
      QDP_destroy_V(P7tmp[dir][i]);
    }
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    for(mu=0; mu<4; mu++) {
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      QDP_destroy_V(P5s[mu][i]);
      //QDP_destroy_V(P5[mu][i]);
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      for(dir=0; dir<8; dir++) {
	//if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
	QDP_destroy_V(P5tmps[mu][dir][i]);
	//printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout);
	//QDP_destroy_V(P5tmp[mu][dir][i]);
	//if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      }
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    }
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
  }

  //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); }
  for(mu=0; mu<8; mu++) {
    for(i=0; i<nsrc; i++) {
      QDP_destroy_V(P3[mu][i]);
    }
    //QDP_destroy_V(P5[mu][0]);
    //QDP_destroy_V(P5[mu][1]);
  }

  for(i=0; i<nsrc; i++) {
    QDP_destroy_V(tv[i]);
  }

  //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); }
  for(i=4; i<8; i++) {
    QDP_destroy_M(fblink[i]);
  }

  dtime += QOP_time();
  info->final_sec = dtime;
  info->final_flop = nflop*QDP_sites_on_node;
  info->status = QOP_SUCCESS;

  ASQTAD_FORCE_END;
#undef NC
}
コード例 #19
0
ファイル: f_mu_nu1.c プロジェクト: erinaldi/milc_qcd
       and multiply the two corners together in the two different ways */
    /* Note f_mn is here used as a temporary! */
    wait_gather(tag0);
    wait_gather(tag1);
    FORALLSITES(i,s){
        mult_su3_na( (su3_matrix *)(gen_pt[0][i]),
	    (su3_matrix *)(gen_pt[1][i]), ((su3_matrix *)F_PT(s,f_mn)) );
        mult_su3_nn( &(s->tempmat1), ((su3_matrix *)F_PT(s,f_mn)),
	    &(s->tempmat2) );
	mult_su3_nn( ((su3_matrix *)F_PT(s,f_mn)), &(s->tempmat1),
	    &(s->staple) );
    }

    /* tempmat2 is the plaquette +mu -nu and must be gathered from -nu */
    tag2 = start_gather_site( F_OFFSET(tempmat2), sizeof(su3_matrix),
	OPP_DIR(nu), EVENANDODD, gen_pt[2] );

    /* staple is the plaquette -mu +nu and must be gather from -mu */
    tag3 = start_gather_site( F_OFFSET(staple), sizeof(su3_matrix),
	OPP_DIR(mu), EVENANDODD, gen_pt[3] );

    /* Now make +mu +nu plaquette and put in f_mn */
    FORALLSITES(i,s){
        mult_su3_nn( &(s->link[mu]), ((su3_matrix *)F_PT(s,f_mn)), &tmat4 );
        mult_su3_na( &tmat4, &(s->link[nu]), ((su3_matrix *)F_PT(s,f_mn)) );
    }

    /* Now gather +mu -nu plaquette and add to f_mn */
    wait_gather(tag2);
    FORALLSITES(i,s){
	add_su3_matrix( ((su3_matrix *)F_PT(s,f_mn)),
コード例 #20
0
ファイル: nl_spectrum.c プロジェクト: erinaldi/milc_qcd
int nl_spectrum( Real vmass, field_offset temp1, field_offset temp2 ) { 
  /* return the C.G. iteration number */
  double *piprop,*pi2prop,*rhoprop,*rho2prop,*barprop;
  double *nlpiprop,*nlpi2prop,*ckpiprop,*ckpi2prop;
  double *delprop,*ckbarprop;
  Real vmass_x2;
  site* s;
  register complex cc;
  Real finalrsq;
  register int i,x,y,z,t,icol,cgn;
  register int t_source,t_off;
  int dir,isrc;

  msg_tag *mtag[16];

  piprop = (double *)malloc( nt*sizeof(double) );
  pi2prop = (double *)malloc( nt*sizeof(double) );
  rhoprop = (double *)malloc( nt*sizeof(double) );
  rho2prop = (double *)malloc( nt*sizeof(double) );
  barprop = (double *)malloc( nt*sizeof(double) );
  nlpiprop = (double *)malloc( nt*sizeof(double) );
  nlpi2prop = (double *)malloc( nt*sizeof(double) );
  ckpiprop = (double *)malloc( nt*sizeof(double) );
  ckpi2prop = (double *)malloc( nt*sizeof(double) );
  delprop = (double *)malloc( nt*sizeof(double) );
  ckbarprop = (double *)malloc( nt*sizeof(double) );

  for( t=0; t<nt; t++ ){
    piprop[t]=0.0; pi2prop[t]=0.0; rhoprop[t]=0.0; rho2prop[t]=0.0;
    nlpiprop[t]=0.0; nlpi2prop[t]=0.0;
    ckpiprop[t]=0.0; ckpi2prop[t]=0.0;
    barprop[t]=0.0; delprop[t]=0.0; ckbarprop[t]=0.0;
  }

  vmass_x2 = 2.*vmass;
  cgn=0;

  /* Fix TUP Coulomb gauge - gauge links only*/
  rephase( OFF );
  gaugefix(TUP,(Real)1.8,500,(Real)GAUGE_FIX_TOL);
  rephase( ON );
#ifdef FN
  invalidate_all_ferm_links(&fn_links);
#endif

  /* Unlike spectrum.c, here we calculate only with wall sources */
  for(t_source=source_start, isrc=0; t_source<2*nt && isrc < n_sources;
        ++isrc, t_source += source_inc ) {
      
      /* Only work for even source slices */
      if( t_source%2 != 0 ){
	printf("DUMMY:  Use even time slices for nl_spectrum()\n");
	terminate(0);
      }

      /* Compute propagator from even wall sites */
      /* Sources are normalized to 1/8 to make them comparable to */
      /* propagators from a wall with ones on the cube origin. */
      /* Put result in propmat */
      
      for(icol=0; icol<3; icol++) {
	  
	  /* initialize temp1 and temp2 */
	  clear_latvec( temp1, EVEN);
	  clear_latvec( temp2, EVEN);
	  
	  for(x=0;x<nx;x++)for(y=0;y<ny;y++)for(z=0;z<nz;z++) {
	      if((x+y+z) % 2 == 0) {
		  if( node_number(x,y,z,t_source) != mynode() )continue;
		  i=node_index(x,y,z,t_source);
		  ((su3_vector *)(F_PT(&lattice[i],temp1)))->c[icol].real = 
		    -0.25;
		}
	    }
	  
	  /* do a C.G. */
	  load_ferm_links(&fn_links);
	  cgn += congrad(niter,rsqprop,EVEN,&finalrsq, &fn_links);
	  /* Multiply by -Madjoint */
	  dslash_site( temp2, temp2, ODD, &fn_links);
	  scalar_mult_latvec( temp2, -vmass_x2, temp2, EVEN);
	  
	  /* fill the hadron matrix */
	  copy_latvec( temp2, F_OFFSET(propmat[icol]), EVENANDODD);
	} /* end loop on icol */
      
      
      /* Compute propagator from odd wall sites */
      /* Put result in propmat2 */
      
      for(icol=0; icol<3; icol++) {
	  
	  /* initialize temp1 and temp2 */
	  clear_latvec( temp1, ODD);
	  clear_latvec( temp2, ODD);
	  for(x=0;x<nx;x++)for(y=0;y<ny;y++)for(z=0;z<nz;z++) {
	      if((x+y+z) % 2 == 1) {
		  if( node_number(x,y,z,t_source) != mynode() )continue;
		  i=node_index(x,y,z,t_source);
		  ((su3_vector *)(F_PT(&lattice[i],temp1)))->c[icol].real = 
		    -0.25;
		}
	    }
	  
	  /* do a C.G. */
	  load_ferm_links(&fn_links);
	  cgn += congrad(niter,rsqprop,ODD,&finalrsq,&fn_links);
	  /* Multiply by -Madjoint */
	  dslash_site( temp2, temp2, EVEN, &fn_links);
	  scalar_mult_latvec( temp2, -vmass_x2, temp2, ODD);
	  
	  /* fill the hadron matrix */
	  copy_latvec( temp2, F_OFFSET(propmat2[icol]), EVENANDODD);
	} /* end loop on icol */
      
      /* cgn now gives the sum for both inversions */
      
      
      /* measure the meson propagator for the E wall source */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  for(x=0;x<nx;x++)for(y=0;y<ny;y++)for(z=0;z<nz;z++)
	    for(icol=0;icol<3;icol++) {
		if( node_number(x,y,z,t_off) != mynode() )continue;
		i=node_index(x,y,z,t_off);
		cc = su3_dot( &lattice[i].propmat[icol],
			     &lattice[i].propmat[icol] );
		
		piprop[t] += cc.real;
		/* (rhoprop and rho2prop are not generated by this source) */
		
		if( (x+y+z)%2==0)pi2prop[t] += cc.real;
		else	     pi2prop[t] -= cc.real;
		
	      }
	  
	} /* nt-loop */
      
      /* measure the baryon propagator for the E wall source */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  for(x=0;x<nx;x+=2)for(y=0;y<ny;y+=2)for(z=0;z<nz;z+=2) {
	      if( node_number(x,y,z,t_off) != mynode() )continue;
	      i=node_index(x,y,z,t_off);
	      cc = det_su3( (su3_matrix *)(lattice[i].propmat) );
	      barprop[t] += cc.real;
	  }
	  
	  /* must get sign right.  This looks to see if we have
	     wrapped around the lattice.  "t" is the distance
	     from the source to the measurement, so we are
	     trying to find out if t_source+t is greater than
	     or equal to nt. */
	  if( (((t+t_source)/nt-t_source/nt)%2) == 1 )barprop[t] *= -1.0;
	  /* change sign because antiperiodic b.c.  sink point
	     should really be in a copy of the lattice */
	} /* nt-loop */
      
      
      /* Measure nonlocal (and some local for checking) propagators    */
      /* These propagators include the delta and some nonlocal mesons  */
      /* The method for the delta is described in M.F.L. Golterman and */
      /* J. Smit, Nucl. Phys. B 255, 328 (1985)                        */
      /* Equation (6.3) defines the sink operator for the delta        */
      /* The method for the mesons is described in M.F.L. Golterman    */
      /* Nucl. Phys. B 273, 663 (1986)                                 */
      /* The treatment of the source wall is described in Gupta,       */
      /* Guralnik, Kilcup, and Sharpe, (GGKS) NSF-ITP-90-172 (1990)    */
      /* To get the delta propagator, we take the "q" propagator       */
      /* matrices for each of the wall colors and antisymmetrize over  */
      /* wall color as well as s                    */
      
      /* First construct the "q" and "o" propagators                   */
      /* Put q = E + O in propmat and o = E - O in propmat2 */
      
      FORALLSITES(i,s) {
	  for(icol=0; icol<3; icol++) {
	      add_su3_vector (&(s->propmat[icol]), &(s->propmat2[icol]), 
			      (su3_vector *)(s->tempmat1.e[icol]) );
	      sub_su3_vector (&(s->propmat[icol]), &(s->propmat2[icol]), 
			      &(s->propmat2[icol]) );
	      su3vec_copy( (su3_vector *)(s->tempmat1.e[icol]),
		&(s->propmat[icol]) );
	    }
	}
      
      
      
      /* Next gather the propagators in preparation for calculating   */
      /* shifted propagators Dq and Do                                */
      
      FORALLUPDIRBUT(TUP,dir) {
	  /* Start bringing "q" = propmat from forward sites    */
	  
	  mtag[dir] = start_gather_site(F_OFFSET(propmat[0]), 
		   sizeof(su3_matrix), dir, EVENANDODD, gen_pt[dir]);
	  
	  /* Start bringing "q" from backward neighbors       */
	  
	  mtag[dir+4] = start_gather_site(F_OFFSET(propmat[0]), 
		   sizeof(su3_matrix), OPP_DIR(dir), EVENANDODD,
		   gen_pt[dir+4]);
	  wait_gather(mtag[dir]);
	  wait_gather(mtag[dir+4]);
	  
	  /* Start bringing "o" = propmat2 from forward sites   */
	  
	  mtag[8+dir] = start_gather_site(F_OFFSET(propmat2[0]), 
		   sizeof(su3_matrix), dir, EVENANDODD, gen_pt[8+dir]);
	  
	      /* Start bringing "o" from backward neighbors       */
	  
	  mtag[8+dir+4] = start_gather_site(F_OFFSET(propmat2[0]), 
		    sizeof(su3_matrix), OPP_DIR(dir), EVENANDODD,
		    gen_pt[8+dir+4]);
	  wait_gather(mtag[8+dir]);
	  wait_gather(mtag[8+dir+4]);
	  
	}
      
      
      /* Calculate and dump delta propagator */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  /* Calculate contribution for each permutation of source color */
	  delta_prop (0,1,2, 1, t_off, &delprop[t]);
	  delta_prop (1,2,0, 1, t_off, &delprop[t]);
	  delta_prop (2,0,1, 1, t_off, &delprop[t]);
	  delta_prop (1,0,2,-1, t_off, &delprop[t]);
	  delta_prop (0,2,1,-1, t_off, &delprop[t]);
	  delta_prop (2,1,0,-1, t_off, &delprop[t]);
	  
	  if( (((t+t_source)/nt-t_source/nt)%2) == 1 ) delprop[t] *= -1;
	} /* nt-loop */
      
      /* Calculate the "q" source nucleon as a check */
      
      /* Calculate and dump nucleon check propagator */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  for(x=0;x<nx;x+=2)for(y=0;y<ny;y+=2)for(z=0;z<nz;z+=2) {
	      if( node_number(x,y,z,t_off) != mynode() )continue;
	      i=node_index(x,y,z,t_off);
	      /* The q propagator is in propmat */
	      cc = det_su3( (su3_matrix *)(lattice[i].propmat) );
	      ckbarprop[t] += cc.real;
	    }
	  
	  if( (((t+t_source)/nt-t_source/nt)%2) == 1 )ckbarprop[t]*= -1.0;
	  /* change sign because antiperiodic b.c.  sink point
	     should really be in a copy of the lattice */
	} /* nt-loop */
      
      /* Calculate nonlocal meson propagators and local check */
      for(t=0; t<nt; t++) {
	  /* Calculate two nonlocal pion propagators */
	  /* These are pi_1 and pi_1 tilde of Gupta et al */
	  /* Also calculate two local propagators as a check */
	  
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  nl_meson_prop(t_off,&nlpiprop[t],&nlpi2prop[t],&ckpiprop[t],
	     &ckpi2prop[t]);
	  
	} /* nt-loop */
      
      /* Clean up gathers */
      FORALLUPDIRBUT(TUP,dir) {
	  cleanup_gather(mtag[dir]);
	  cleanup_gather(mtag[dir+4]);
	  cleanup_gather(mtag[8+dir]);
	  cleanup_gather(mtag[8+dir+4]);
	}
コード例 #21
0
ファイル: staple.c プロジェクト: aacarosso/aacmilc
  FORALLSITES(i, s)
    mult_su3_an((su3_matrix*)F_PT(s,lnk2), (su3_matrix*)F_PT(s,lnk1),
                tempmat1 + i);

   wait_gather(tag0);
   wait_gather(tag1);

  // Finish lower staple
  FORALLSITES(i, s) {
    mult_su3_nn(tempmat1 + i, (su3_matrix *)gen_pt[0][i], &tmat1);
    su3mat_copy(&tmat1, tempmat1 + i);
  }

  // Gather staple from direction -dir2 to "home" site
  tag2 = start_gather_field(tempmat1, sizeof(su3_matrix),
                      OPP_DIR(dir2), EVENANDODD, gen_pt[2]);

  // Calculate upper staple, add it
  FORALLSITES(i, s) {
    mult_su3_nn((su3_matrix*)F_PT(s,lnk2), (su3_matrix *)gen_pt[1][i], &tmat1);
    mult_su3_na(&tmat1, (su3_matrix *)gen_pt[0][i], &tmat2);
    add_su3_matrix(stp + i, &tmat2, stp + i);
  }

  // Finally add the lower staple
  wait_gather(tag2);
  FORALLSITES(i, s)
    add_su3_matrix(stp+i, (su3_matrix *)gen_pt[2][i], stp+i);

  cleanup_gather(tag0);
  cleanup_gather(tag1);
コード例 #22
0
ファイル: utilities.c プロジェクト: daschaich/fourfermion
// -----------------------------------------------------------------
// Matrix--vector operation
// Applies either the operator (sign = 1) or its adjoint (sign = -1)
// Adjoint is simply overall negative sign...
void fermion_op(vector *src, vector *dest, int sign) {
  register int i;
  register site *s;
  int dir, a, b, c, d, par, L[NDIMS] = {nx, ny, nz, nt};
  Real tr, halfG = 0.5 * G, m_ov_G, vev[DIMF][DIMF];
  vector tvec, tvec_dir, tvec_opp;
  msg_tag *tag[2 * NDIMS];

  // Quick sanity check
  if (sign != 1 && sign != -1) {
    node0_printf("Error: incorrect sign in fermion_op: %d\n", sign);
    terminate(1);
  }

  // Ignore site_mass if G = 0 to avoid dividing by zero
  // Could be made more robust, but unlikely to matter
  if (G == 0.0)
    m_ov_G = 0.0;
  else
    m_ov_G = 2.0 * site_mass / G;
  for (a = 0; a < DIMF; a++) {
    for (b = 0; b < DIMF; b++)
      vev[a][b] = 0.0;
  }
  vev[0][1] = m_ov_G;
  vev[2][3] = m_ov_G;
  vev[1][0] = -m_ov_G;
  vev[3][2] = -m_ov_G;

  // Start gathers for kinetic term
  FORALLUPDIR(dir) {
    if (L[dir] <= 1)              // Will be skipped below
      continue;

    tag[dir] = start_gather_field(src, sizeof(vector), dir,
                                  EVENANDODD, gen_pt[dir]);
    tag[OPP_DIR(dir)] = start_gather_field(src, sizeof(vector), OPP_DIR(dir),
                                           EVENANDODD, gen_pt[OPP_DIR(dir)]);
  }

  // Compute scalar term as gathers run
  // Initialize dest = 0.5G * (sigma + 2m / G) * src
  // Add SO(4)-breaking 'site mass' term with same structure as sigma
  FORALLSITES(i, s) {
    clearvec(&(dest[i]));
    if (stagger == -1 || lattice[i].parity == EVEN)
      par = 1;
    else              // Both stagger == 1 and lattice[i].parity == ODD
      par = -1;

    for (a = 0; a < DIMF; a++) {
      for (b = a + 1; b < DIMF; b++) {
        tr = s->sigma.e[as_index[a][b]] + par * vev[a][b];
        for (c = 0; c < DIMF; c++) {
          for (d = c + 1; d < DIMF; d++) {
            tr += perm[a][b][c][d] * (s->sigma.e[as_index[c][d]]
                                      + par * vev[c][d]);
          }
        }   // No half since not double-counting
        dest[i].c[a] += tr * src[i].c[b];
        dest[i].c[b] -= tr * src[i].c[a];
      }
    }
    scalar_mult_vec(&(dest[i]), halfG, &(dest[i]));
  }
コード例 #23
0
ファイル: quark_stuff_hisq.c プロジェクト: erinaldi/milc_qcd
static int 
add_basic_path( Q_path *this_q_paths, int path_table_index, 
		int *basic_vec, int length, Real coeff, int max_paths ) {
  // this_q_paths is array of paths we are building
    // path_table_index is starting index when called
    // basic_vec is list of directions in basic path
    // length is length of basic path
    // coeff is coefficient in action

    int perm[8],pp[8],ir[4];
    int j,path_num;
    int vec[MAX_LENGTH];
    int flag;
         //node0_printf("ADD BASIC PATH %d:  ",path_table_index);
	 //printpath( basic_vec, length );

    path_num = 0;  // number of paths made from this basic path so far
    /* now fill the long table with all rotations and reflections
	of the fundamental path.  The path presented to us is for
        the positive x component of dslash, so if the x coordinate
        is reflected it will appear with a negative sign. */
      /* permutations */
      for(perm[0]=0;perm[0]<4;perm[0]++)
      for(perm[1]=0;perm[1]<4;perm[1]++)
      for(perm[2]=0;perm[2]<4;perm[2]++)
      for(perm[3]=0;perm[3]<4;perm[3]++){
	if(perm[0] != perm[1] && perm[0] != perm[2] 
	  && perm[0] != perm[3] && perm[1] != perm[2]
	  && perm[1] != perm[3] && perm[2] != perm[3] ) {
	  /* reflections*/
	  for(ir[0]=0;ir[0]<2;ir[0]++)
	  for(ir[1]=0;ir[1]<2;ir[1]++)
	  for(ir[2]=0;ir[2]<2;ir[2]++)
	  for(ir[3]=0;ir[3]<2;ir[3]++){
	    for(j=0;j<4;j++){
	      pp[j]=perm[j];

	      if(ir[j] == 1) pp[j]=OPP_DIR(pp[j]);
	      pp[OPP_DIR(j)]=OPP_DIR(pp[j]);
	    }
	    /* create new vector*/
	    for(j=0;j<length;j++) vec[j]=pp[basic_vec[j]];
	    for(j=length;j<MAX_LENGTH;j++) vec[j]=NODIR;

            flag=0;
	    /* check if it's a new set: */
	    for(j=0;j<path_table_index;j++){
	      flag = is_path_equal( vec, this_q_paths[j].dir, MAX_LENGTH );
	      if(flag==1)break;
	    }
	    if(flag == 0 ){
	      if(path_table_index>=max_paths){
		node0_printf("OOPS: MAX_NUM too small\n");
		exit(0);
	      }
	      this_q_paths[path_table_index].length=length;
	      for(j=0;j<MAX_LENGTH;j++) {
		this_q_paths[path_table_index].dir[j]=vec[j];
	      }
		/* remember to copy NODIR's, or comparison will get confused */
	      if(ir[0]==0){
		this_q_paths[path_table_index].coeff =  coeff;
		this_q_paths[path_table_index].forwback =  +1;
	      }
	      else{
		this_q_paths[path_table_index].coeff = -coeff;
		this_q_paths[path_table_index].forwback = -1;
	      }
	      path_table_index++;
	      path_num++;
	         //node0_printf("ADD PATH %d:  rx=%d ",path_table_index-1,ir[0]);
		 //printpath( vec, length );
	    }

	  } /* end reflection*/
        } /* end permutation if block */
      } /* end permutation */
	//node0_printf("ADD BASIC PATH: added %d entries\n",path_num);
    return(path_num);
} /* add_basic_path */
コード例 #24
0
ファイル: utilities.c プロジェクト: daschaich/fourfermion
      // Add link mass operator to dest
      switch(dir) {
        case XUP: par = s->x; break;
        case YUP: par = s->y; break;
        case ZUP: par = s->z; break;
        case TUP: par = s->t; break;
      }
      tr = 0.5 * link_mass * s->phase[dir];
      if (par % 2 != 0)
        tr *= -1;

      add_vec(&tvec_dir, &tvec_opp, &tvec);
      scalar_mult_add_vec(&(dest[i]), &tvec, tr, &(dest[i]));
    }
    cleanup_gather(tag[dir]);
    cleanup_gather(tag[OPP_DIR(dir)]);
  }

  // Overall negative sign for adjoint
  if (sign == -1) {
    FORALLSITES(i, s)
      scalar_mult_vec(&(dest[i]), -1.0, &(dest[i]));
  }
}
// -----------------------------------------------------------------



// -----------------------------------------------------------------
// Squared four-fermion matrix--vector operation
//   dest = D^2 src
コード例 #25
0
void 
QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, 
		   QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps)
{
  REAL Plaq, Rect, Pgm ;
  QDP_ColorMatrix *tempmom_qdp[4];
  QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu)
  QDP_ColorMatrix *tmpmat;
  QDP_ColorMatrix *tmpmat1;
  QDP_ColorMatrix *tmpmat2;
  QDP_ColorMatrix *staples;
  QDP_ColorMatrix *tmpmat3;
  QDP_ColorMatrix *tmpmat4;

  int i, k;
  int mu, nu, sig;
  double dtime;
  //REAL eb3 = -eps*beta/3.0;
  REAL eb3 = -eps/3.0;
  int j[3][2] = {{1,2},
                 {0,2},
                 {0,1}};
  
  //  QOP_printf0("beta: %e, eb3: %e\n", beta, eb3);
  dtime = -QOP_time();

  for(mu=0; mu<4; mu++) {
    tempmom_qdp[mu] = QDP_create_M();
    QDP_M_eq_zero(tempmom_qdp[mu], QDP_all);
  }

  tmpmat = QDP_create_M();
  for(i=0; i<QOP_common.ndim; i++) {
    fblink[i] = gauge->links[i];
    fblink[OPP_DIR(i)] = QDP_create_M();
    QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all);
    QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all);
  }
  

  for(i=0; i<6; i++) {
    Amu[i] = QDP_create_M();
  }

  staples = QDP_create_M();
  tmpmat1 = QDP_create_M();
  tmpmat2 = QDP_create_M();
  tmpmat3 = QDP_create_M();
  tmpmat4 = QDP_create_M();

  Plaq = coeffs->plaquette;
  Rect = coeffs->rectangle;
  Pgm  = coeffs->parallelogram;

  //Construct 3-staples and rectangles
  for(mu=0; mu<4; mu++) {
    i=0;
    for(nu=0; nu<4; nu++) {
      if(nu!=mu){
	// tmpmat1 = Umu(x+nu)
	QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); 
        QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all);

        //tmpmat2 = Umu(x-nu)
	QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all);
        QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all);
       

 
	//tmpmat = U_{nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all);        
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all);
 
        //tmpmat = U_{-nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all);
        QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all);

        //tmpmat = U_{-nu}(x+mu)
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all);        
        QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all);




        //tmpmat = U_{-nu}(x+mu) 
        QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all);        
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all);
        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all);
        QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all);

        QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all);
        QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all);

        QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
        QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all);
        QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all);
        QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all);
        i++;
      }
      
    }

    // Construct the  pgm staples and add them to force
    QDP_M_eq_zero(staples, QDP_all);
    i=0;
    for(nu=0; nu<4; nu++){
      if(nu!=mu){
        k=0;
	for(sig=0; sig<4;sig ++){
	  if(sig!=mu && nu!=sig){
	    
	    // the nu_sig_mu ... staple and 3 reflections
            //tmpmat = Amu["sig"](x+nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all);   
            //tmpmat3 = Unu(x+mu+sig)
            QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE?
            //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = Usig(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);


            //tmpmat = Amu["sig"](x-nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all);   
            //tmpmat3 = U_{-nu}(x+mu+sig)
            QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE?
            //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = Usig(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);


            //tmpmat = Amu["-sig"](x-nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all);   
            //tmpmat = U_{-nu}(x+mu-sig)
            QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE?
            //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = U_{-sig}(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);

            


            //tmpmat = Amu["-sig"](x+nu)
	    QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["-sig"](x+nu)
            QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all);   
            //tmpmat3 = Unu(x+mu-sig)
            QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all);
	    QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE?
            //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig))
	    QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all);
            //tmpmat = U_{-sig}(x+mu)
	    QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all);
            //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu))
	    QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all);

	    QDP_M_peq_M(staples, tmpmat1, QDP_all);

	    k++;
	  }//close if sig!=nu ...
	}//close sig loop
	i++;
      }// close if nu!=mu
    }//close the pgm nu loop

    QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all);
   

    
  }// closes the mu loop

#ifdef CHKSUM
  QLA_ColorMatrix qcm;
  QLA_Complex det, chk;
  QLA_c_eq_r(chk, 0);
#endif
  for(mu=0; mu<4; mu++){
    QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE?
    QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE?
    QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE
#ifdef CHKSUM
    QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all);
    QLA_C_eq_det_M(&det, &qcm);
    QLA_c_peq_c(chk, det);
#endif
  }
#ifdef CHKSUM
  QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk));
#endif

  //DESTROY various fields

  QDP_destroy_M(tmpmat);
  QDP_destroy_M(tmpmat1);
  QDP_destroy_M(tmpmat2);
  QDP_destroy_M(tmpmat3);
  QDP_destroy_M(staples);
  QDP_destroy_M(tmpmat4);

  for(mu=0; mu<4; mu++){
    QDP_destroy_M(tempmom_qdp[mu]);
  }
  for(i=0; i<6; i++) {
    QDP_destroy_M(Amu[i]);
  }

  for(i=4; i<8; i++) {
    QDP_destroy_M(fblink[i]);
  }

  dtime += QOP_time();

  double nflop = 96720;
  info->final_sec = dtime;
  info->final_flop = nflop*QDP_sites_on_node; 
  info->status = QOP_SUCCESS;
  //QOP_printf0("Time in slow g_force: %e\n", info->final_sec);
} 
コード例 #26
0
ファイル: fermion_links_hyp.c プロジェクト: erinaldi/milc_qcd
/* compute wiggly links in direction dir1 decorated in direction dir2 */
void hyp_block_stage1(register int dir1, register int dir2, int parity,
  su3_matrix *U_link, su3_matrix *Wiggly_link, int dir_exclude,
  hyp_coeffs_t *hc ) {

  register int i;
  register site *st;
  msg_tag *tag0,*tag1,*tag2,*tag3,*tag4;
  int start;
  register int count,nWiggly;
  su3_matrix tmat1,tmat2,fatq;
  su3_matrix *tempmat1;
  int disp[4];	/* displacement vector for general gather */

  /* create temporary storage, one matrix per site */
  tempmat1 = create_mn_special(1);

  start=1; /* indicates staple sum not initialized */

  // array size is fixed for 4D, in 3D not all entries are filled
  nWiggly = 12;

  count=3*dir1+dir2;
  if(dir2>dir1)count=count-1; 

  /* displacement vector for link 2 sites away */
  for(i=XUP;i<=TUP;i++)disp[i]=0;
  disp[dir1] = 1;
  disp[dir2] = -1;
  
  /* get U_link[dir2] from direction dir1 */
  tag0 = declare_strided_gather( U_link + dir2, 4*sizeof(su3_matrix),
           sizeof(su3_matrix), dir1, parity, gen_pt[0] );
  do_gather( tag0 );
  
  /* get U_link[dir1] from direction dir2 */
  tag1 = declare_strided_gather( U_link + dir1, 4*sizeof(su3_matrix),
           sizeof(su3_matrix), dir2, parity, gen_pt[1] );
  do_gather( tag1 );
  
  /* get U_link[dir2] from direction -dir2 */
  tag2 = declare_strided_gather( U_link + dir2, 4*sizeof(su3_matrix),
           sizeof(su3_matrix), OPP_DIR(dir2), parity, gen_pt[2] );
  do_gather( tag2 );
  
  /* get U_link[dir1] from direction -dir2 */
  tag3 = declare_strided_gather( U_link + dir1, 4*sizeof(su3_matrix),
           sizeof(su3_matrix), OPP_DIR(dir2), parity, gen_pt[3] );
  do_gather( tag3 );
  
  /* get U_link[dir2] from displacement +dir1-dir2 */
  tag4 = start_general_strided_gather( (char *)(U_link + dir2), 4*sizeof(su3_matrix),
           sizeof(su3_matrix), disp, parity, gen_pt[4] );

  /* Upper staple */
  wait_gather(tag0);
  wait_gather(tag1);
  if(start){  /* this is the first contribution to staple */
    FORSOMEPARITY(i,st,parity){
      mult_su3_nn( &(U_link[4*i+dir2]), (su3_matrix *)gen_pt[1][i], &tmat1 );
      mult_su3_na( &tmat1, (su3_matrix *)gen_pt[0][i], &(tempmat1[i]) );
      
    }
    start=0; 
  }
コード例 #27
0
ファイル: dslash_fn2.c プロジェクト: goyalankit/milc
/* D_slash routine - sets dest. on each site equal to sum of
   sources parallel transported to site, with minus sign for transport
   from negative directions.  Use "fatlinks" for one link transport,
   "longlinks" for three link transport. */
void dslash_fn( field_offset src, field_offset dest, int parity ) {
   register int i;
   register site *s;
   register int dir,otherparity;
   register su3_matrix *fat4, *long4;
   msg_tag *tag[16];

    if(!valid_longlinks)load_longlinks();
    if(!valid_fatlinks)load_fatlinks();
    switch(parity){
	case EVEN:	otherparity=ODD; break;
	case ODD:	otherparity=EVEN; break;
	case EVENANDODD:	otherparity=EVENANDODD; break;
    }

    /* Start gathers from positive directions */
    /* And start the 3-step gather too */
    for( dir=XUP; dir<=TUP; dir++ ){
	tag[dir] = start_gather( src, sizeof(su3_vector), dir, parity,
	    gen_pt[dir] );
	tag[DIR3(dir)] = start_gather( src, sizeof(su3_vector), DIR3(dir),
	    parity, gen_pt[DIR3(dir)] );
    }

    /* Multiply by adjoint matrix at other sites */
    /* Use fat link for single link transport */
    FORSOMEPARITY( i, s, otherparity ){
      if( i < loopend-FETCH_UP ){
#ifdef DSLASH_TMP_LINKS
	fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
	long4 = &(t_longlink[4*(i+FETCH_UP)]);
#else
	fat4 = (s+FETCH_UP)->fatlink;
	long4 = (s+FETCH_UP)->longlink;
#endif
	prefetch_4MV4V( 
		       fat4,
		       (su3_vector *)F_PT(s+FETCH_UP,src),
		       (s+FETCH_UP)->tempvec );
	prefetch_4MV4V(
		       long4,
		       (su3_vector *)F_PT(s+FETCH_UP,src),
		       (s+FETCH_UP)->templongvec );
      }

#ifdef DSLASH_TMP_LINKS
      fat4 = &(t_fatlink[4*i]);
      long4 = &(t_longlink[4*i]);
#else
      fat4 = s->fatlink;
      long4 = s->longlink;
#endif
	mult_adj_su3_mat_vec_4dir( fat4,
	    (su3_vector *)F_PT(s,src), s->tempvec );
	/* multiply by 3-link matrices too */
	mult_adj_su3_mat_vec_4dir( long4,
	    (su3_vector *)F_PT(s,src), s->templongvec );
    } END_LOOP

    /* Start gathers from negative directions */
    for( dir=XUP; dir <= TUP; dir++){
	tag[OPP_DIR(dir)] = start_gather( F_OFFSET(tempvec[dir]),
	    sizeof(su3_vector), OPP_DIR( dir), parity,
	    gen_pt[OPP_DIR(dir)] );
    }

    /* Start 3-neighbour gathers from negative directions */
    for( dir=X3UP; dir <= T3UP; dir++){
	tag[OPP_3_DIR(dir)] 
           = start_gather( F_OFFSET(templongvec[INDEX_3RD(dir)]),
			   sizeof(su3_vector), OPP_3_DIR( dir), parity,
			   gen_pt[OPP_3_DIR(dir)] );
    }

    /* Wait gathers from positive directions, multiply by matrix and
	accumulate */
    /* wait for the 3-neighbours from positive directions, multiply */
    for(dir=XUP; dir<=TUP; dir++){
	wait_gather(tag[dir]);
	wait_gather(tag[DIR3(dir)]);
    }
    /* Wait gathers from negative directions, accumulate (negative) */
    /* and the same for the negative 3-rd neighbours */
    for(dir=XUP; dir<=TUP; dir++){
	wait_gather(tag[OPP_DIR(dir)]);
    }
    for(dir=X3UP; dir<=T3UP; dir++){
	wait_gather(tag[OPP_3_DIR(dir)]);
    }


    FORSOMEPARITY(i,s,parity){
#ifdef DSLASH_TMP_LINKS
      fat4 = &(t_fatlink[4*i]);
      long4 = &(t_longlink[4*i]);
#else
      fat4 = s->fatlink;
      long4 = s->longlink;
#endif
      mult_su3_mat_vec_sum_4dir( fat4,
	    (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i],
	    (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i],
	    (su3_vector *)F_PT(s,dest));

      mult_su3_mat_vec_sum_4dir( long4,
	    (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i],
	    (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i],
	    (su3_vector *) &(s->templongv1));

      if( i < loopend-FETCH_UP ){
#ifdef DSLASH_TMP_LINKS
	fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
	long4 = &(t_longlink[4*(i+FETCH_UP)]);
#else
	fat4 = (s+FETCH_UP)->fatlink;
	long4 = (s+FETCH_UP)->longlink;
#endif
	prefetch_4MVVVV( 
              fat4,
	      (su3_vector *)gen_pt[XUP][i+FETCH_UP],
              (su3_vector *)gen_pt[YUP][i+FETCH_UP],
              (su3_vector *)gen_pt[ZUP][i+FETCH_UP],
              (su3_vector *)gen_pt[TUP][i+FETCH_UP] );
	prefetch_4MVVVV( 
              long4,
              (su3_vector *)gen_pt[X3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[Y3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[Z3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[T3UP][i+FETCH_UP] );
	prefetch_VVVV( 
              (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
	prefetch_VVVV( 
              (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
        }

        sub_four_su3_vecs( (su3_vector *)F_PT(s,dest),
	    (su3_vector *)(gen_pt[XDOWN][i]),
	    (su3_vector *)(gen_pt[YDOWN][i]),
	    (su3_vector *)(gen_pt[ZDOWN][i]),
	    (su3_vector *)(gen_pt[TDOWN][i]) );
        sub_four_su3_vecs( &(s->templongv1), 
	    (su3_vector *)(gen_pt[X3DOWN][i]),
	    (su3_vector *)(gen_pt[Y3DOWN][i]),
	    (su3_vector *)(gen_pt[Z3DOWN][i]),
	    (su3_vector *)(gen_pt[T3DOWN][i]) );
        /* Now need to add these things together */
        add_su3_vector((su3_vector *)F_PT(s,dest), & (s->templongv1),
			           (su3_vector *)F_PT(s,dest));
    } END_LOOP
コード例 #28
0
ファイル: fuz_prop.c プロジェクト: erinaldi/milc_qcd
		}

		wait_gather(tag1);
/*
		copy_latvec( (field_offset)gen_pt[1], F_OFFSET(resid),
		    EVENANDODD);
*/
		FORALLSITES(i,s) {
		    su3vec_copy((su3_vector *)gen_pt[1][i], &(s->resid));
		}
		FORALLSITES(i,s) {
		    mult_adj_su3_mat_vec(&(s->link[dir]), &(s->resid),
			&(s->ttt));
		}
		restart_gather_site(F_OFFSET(ttt), sizeof(su3_vector),
		    OPP_DIR(dir), EVENANDODD, gen_pt[1], tag1);

	    } /* k<r0 */

	    wait_gather(tag0);
	    FORALLSITES(i,s) {
		mult_su3_mat_vec_sum(&(s->link[dir]),
		    (su3_vector *)(gen_pt[0][i]), (su3_vector *)F_PT(s,fprop));
	    }
	    cleanup_gather(tag0);

	    wait_gather(tag1);
	    FORALLSITES(i,s) {
		add_su3_vector((su3_vector *)(gen_pt[1][i]),
		    (su3_vector *)F_PT(s,fprop), (su3_vector *)F_PT(s,fprop));
	    }
コード例 #29
0
ファイル: dslash_fn.c プロジェクト: erinaldi/milc_qcd
/* Special dslash for use by congrad.  Uses restart_gather_field() when
  possible. Next to last argument is an array of message tags, to be set
  if this is the first use, otherwise reused. If start=1,use
  start_gather_field, otherwise use restart_gather_field. 
  The calling program must clean up the gathers and temps! */
void dslash_fn_field_special(su3_vector *src, su3_vector *dest,
			     int parity, msg_tag **tag, int start,
			     fn_links_t *fn){
  register int i;
  register site *s;
  register int dir,otherparity=0;
  register su3_matrix *fat4;
  su3_matrix *t_fatlink;
#ifndef NO_LONG_LINKS
  register su3_matrix *long4;
  su3_matrix *t_longlink;
#endif
  
  /* allocate temporary work space only if not already allocated */
  if(temp_not_allocated)
    {
      for( dir=XUP; dir<=TUP; dir++ ){
	temp[dir]  =(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
	temp[dir+4]=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
      }
      temp[8]=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
      temp_not_allocated = 0 ;
    }
  
  /* load fatlinks and longlinks */
  if(fn == NULL){
    printf("dslash_fn_field_special: invalid fn links!\n");
    terminate(1);
  }
#ifndef NO_LONG_LINKS
  t_longlink = get_lnglinks(fn);
#endif
  t_fatlink = get_fatlinks(fn);

  switch(parity)
    {
    case EVEN:	otherparity=ODD; break;
    case ODD:	otherparity=EVEN; break;
    case EVENANDODD:	otherparity=EVENANDODD; break;
    }
  
  /* Start gathers from positive directions */
  /* And start the 3-step gather too */
  for( dir=XUP; dir<=TUP; dir++ ){
    if(start==1)
      {
	tag[dir] = start_gather_field( src, sizeof(su3_vector), 
					   dir, parity,gen_pt[dir] );
#ifndef NO_LONG_LINKS
	tag[DIR3(dir)] = start_gather_field(src, sizeof(su3_vector),
						DIR3(dir),parity, 
						gen_pt[DIR3(dir)] );
#endif
      }
    else
      {
	restart_gather_field( src, sizeof(su3_vector), 
				  dir, parity,gen_pt[dir], tag[dir]);
#ifndef NO_LONG_LINKS
	restart_gather_field(src, sizeof(su3_vector), DIR3(dir), parity, 
				 gen_pt[DIR3(dir)], tag[DIR3(dir)]);
#endif
      }
  }
  
  /* Multiply by adjoint matrix at other sites */
  /* Use fat link for single link transport */
  FORSOMEPARITYDOMAIN_OMP( i, s, otherparity, private(fat4,long4) ){
    //NOPRE if( i < loopend-FETCH_UP ){
       //NOPRE fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
       //NOPRE prefetch_V(&(src[i+FETCH_UP]));
       //NOPRE prefetch_4MVVVV( 
		       //NOPRE fat4,
		       //NOPRE &(temp[0][i+FETCH_UP]),
		       //NOPRE &(temp[1][i+FETCH_UP]),
		       //NOPRE &(temp[2][i+FETCH_UP]),
		       //NOPRE &(temp[3][i+FETCH_UP]) );
#ifndef NO_LONG_LINKS
       //NOPRE long4 = &(t_longlink[4*(i+FETCH_UP)]);
       //NOPRE prefetch_4MVVVV( 
		       //NOPRE long4,
		       //NOPRE &(temp[4][i+FETCH_UP]),
		       //NOPRE &(temp[5][i+FETCH_UP]),
		       //NOPRE &(temp[6][i+FETCH_UP]),
		       //NOPRE &(temp[7][i+FETCH_UP]) );
#endif
    //NOPRE }

    fat4 = &(t_fatlink[4*i]);
    mult_adj_su3_mat_4vec( fat4, &(src[i]), &(temp[0][i]),
			   &(temp[1][i]), &(temp[2][i]), &(temp[3][i]) );
#ifndef NO_LONG_LINKS
    /* multiply by 3-link matrices too */
    long4 = &(t_longlink[4*i]);
    mult_adj_su3_mat_4vec( long4, &(src[i]),&(temp[4][i]),
			   &(temp[5][i]), &(temp[6][i]), &(temp[7][i]) );
#endif
  } END_LOOP_OMP
      
  /* Start gathers from negative directions */
  for( dir=XUP; dir <= TUP; dir++){
    if (start==1) tag[OPP_DIR(dir)] = start_gather_field( temp[dir],
	  sizeof(su3_vector), OPP_DIR( dir), parity, gen_pt[OPP_DIR(dir)] );
    else restart_gather_field( temp[dir], sizeof(su3_vector), 
	   OPP_DIR( dir), parity, gen_pt[OPP_DIR(dir)], tag[OPP_DIR(dir)] );
  }

  /* Start 3-neighbour gathers from negative directions */
  for( dir=X3UP; dir <= T3UP; dir++){
    if (start==1) tag[OPP_3_DIR(dir)]=start_gather_field(
		 temp[INDEX_3RD(dir)+4], sizeof(su3_vector), 
		 OPP_3_DIR( dir), parity, gen_pt[OPP_3_DIR(dir)] );
    else restart_gather_field(temp[INDEX_3RD(dir)+4], 
	      sizeof(su3_vector), OPP_3_DIR( dir),parity, 
	      gen_pt[OPP_3_DIR(dir)], tag[OPP_3_DIR(dir)] );
  }

  /* Wait gathers from positive directions, multiply by matrix and
     accumulate */
  /* wait for the 3-neighbours from positive directions, multiply */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[dir]);
#ifndef NO_LONG_LINKS
    wait_gather(tag[DIR3(dir)]);
#endif
  }
  
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, private(fat4,long4) ){
    //NOPRE if( i < loopend-FETCH_UP ){
      //NOPRE fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
      //NOPRE prefetch_4MVVVV( 
		      //NOPRE fat4,
		      //NOPRE (su3_vector *)gen_pt[XUP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[YUP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[ZUP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[TUP][i+FETCH_UP] );
      //NOPRE prefetch_VVVV( 
		    //NOPRE (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
#ifndef NO_LONG_LINKS
      //NOPRE long4 = &(t_longlink[4*(i+FETCH_UP)]);
      //NOPRE prefetch_4MVVVV( 
		      //NOPRE long4,
		      //NOPRE (su3_vector *)gen_pt[X3UP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[Y3UP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[Z3UP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[T3UP][i+FETCH_UP] );
      //NOPRE prefetch_VVVV( 
		    //NOPRE (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
#endif
    //NOPRE }
    
    fat4 = &(t_fatlink[4*i]);
    mult_su3_mat_vec_sum_4dir( fat4,
	       (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i],
	       (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i],
	       &(dest[i]) );
    
#ifndef NO_LONG_LINKS
    long4 = &(t_longlink[4*i]);
    mult_su3_mat_vec_sum_4dir( long4,
	    (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i],
	    (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i],
	    &(temp[8][i]));
#endif
  } END_LOOP_OMP
   
  /* Wait gathers from negative directions, accumulate (negative) */
  /* and the same for the negative 3-rd neighbours */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[OPP_DIR(dir)]);
  }
  for(dir=X3UP; dir<=T3UP; dir++){
    wait_gather(tag[OPP_3_DIR(dir)]);
  }
  
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, ){
    //NOPRE if( i < loopend-FETCH_UP ){
      //NOPRE prefetch_VVVVV( 
		     //NOPRE &(dest[i+FETCH_UP]),
		     //NOPRE (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
      //NOPRE prefetch_VVVVV( 
		     //NOPRE &(temp[8][i+FETCH_UP]), 
		     //NOPRE (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
    //NOPRE }
    
    sub_four_su3_vecs( &(dest[i]),
		       (su3_vector *)(gen_pt[XDOWN][i]),
		       (su3_vector *)(gen_pt[YDOWN][i]),
		       (su3_vector *)(gen_pt[ZDOWN][i]),
		       (su3_vector *)(gen_pt[TDOWN][i]) );
    sub_four_su3_vecs( &(temp[8][i]), 
		       (su3_vector *)(gen_pt[X3DOWN][i]),
		       (su3_vector *)(gen_pt[Y3DOWN][i]),
		       (su3_vector *)(gen_pt[Z3DOWN][i]),
		       (su3_vector *)(gen_pt[T3DOWN][i]) );
    /* Now need to add these things together */
    add_su3_vector(&(dest[i]), &(temp[8][i]),&(dest[i]));
  } END_LOOP_OMP
      
}
コード例 #30
0
ファイル: dslash_fn.c プロジェクト: erinaldi/milc_qcd
/* Special dslash_site for use by congrad.  Uses restart_gather_site() when
  possible. Third to last argument is an array of message tags, to be set
  if this is the first use, otherwise reused. If start=1,use
  start_gather_site, otherwise use restart_gather_site. 
  The calling program must clean up the gathers! */
void dslash_fn_site_special( field_offset src, field_offset dest,
			     int parity, msg_tag **tag, int start,
			     fn_links_t *fn){
  register int i;
  register site *s;
  register int dir,otherparity=0;
  register su3_matrix *fat4;
  su3_matrix *t_fatlink;
#ifndef NO_LONG_LINKS
  register su3_matrix *long4;
  su3_matrix *t_longlink;
  su3_vector *templongvec, *templongv1;
#endif
  su3_vector *tempvec;
  char myname[] = "dslash_fn_site_special";
  
  if(fn == NULL){
    printf("dslash_fn_site_special: invalid fn links!\n");
    terminate(1);
  }
#ifndef NO_LONG_LINKS
  t_longlink = get_lnglinks(fn);
#endif
  t_fatlink = get_fatlinks(fn);

  tempvec = (su3_vector *) malloc(sizeof(su3_vector)*4*sites_on_node);
  if(tempvec == NULL){
    printf("%s(%d)No room for temporary\n",myname, this_node);
    terminate(1);
  }
  
#ifndef NO_LONG_LINKS
  templongvec = (su3_vector *) malloc(sizeof(su3_vector)*4*sites_on_node);
  if(templongvec == NULL){
    printf("%s(%d)No room for temporary\n",myname, this_node);
    terminate(1);
  }
  
  templongv1 = create_v_field();
#endif
  
  switch(parity){
  case EVEN:	otherparity=ODD; break;
  case ODD:	otherparity=EVEN; break;
  case EVENANDODD:	otherparity=EVENANDODD; break;
  }
  
  /* Start gathers from positive directions */
  for(dir=XUP; dir<=TUP; dir++){
    if(start==1) tag[dir] = start_gather_site( src, sizeof(su3_vector),
					       dir, parity, gen_pt[dir] );
    else restart_gather_site( src, sizeof(su3_vector),
			      dir, parity, gen_pt[dir] , tag[dir] );
  }
  
  /* and start the 3rd neighbor gather */
  for(dir=X3UP; dir<=T3UP; dir++){
    if(start==1) tag[dir] = start_gather_site( src, sizeof(su3_vector),
					       dir, parity, gen_pt[dir] );
    else restart_gather_site( src, sizeof(su3_vector),
			      dir, parity, gen_pt[dir] , tag[dir] ); 
  }
  
  /* Multiply by adjoint matrix at other sites */
  FORSOMEPARITYDOMAIN_OMP(i,s,otherparity,private(fat4,long4)){
    if( i < loopend-FETCH_UP ){
      fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
      prefetch_4MV4V( 
		     fat4,
		     (su3_vector *)F_PT(s+FETCH_UP,src),
		     tempvec+4*i+FETCH_UP );
#ifndef NO_LONG_LINKS
      long4 = &(t_longlink[4*(i+FETCH_UP)]);
      prefetch_4MV4V(
		     long4,
		     (su3_vector *)F_PT(s+FETCH_UP,src),
		     templongvec+4*i+FETCH_UP );
#endif
    }
    
    fat4 = &(t_fatlink[4*i]);
#ifndef NO_LONG_LINKS
    long4 = &(t_longlink[4*i]);
#endif
    mult_adj_su3_mat_vec_4dir( fat4,
			       (su3_vector *)F_PT(s,src), (tempvec+4*i) );
    /* multiply by 3-link matrices too */
#ifndef NO_LONG_LINKS
    mult_adj_su3_mat_vec_4dir( long4,
			       (su3_vector *)F_PT(s,src), (templongvec+4*i) );
#endif
  } END_LOOP_OMP
      
  /* Start gathers from negative directions */
  for( dir=XUP; dir <= TUP; dir++){
    if (start==1){
      /* We need the strided gather so we can pick off one of a
	 group of four vectors in tempvec */
      tag[OPP_DIR(dir)] = 
	declare_strided_gather( (char *)(tempvec+dir), 4*sizeof(su3_vector), 
				sizeof(su3_vector), OPP_DIR( dir), parity, 
				gen_pt[OPP_DIR(dir)] );
      prepare_gather(tag[OPP_DIR(dir)]);
      do_gather(tag[OPP_DIR(dir)]);
    }	else {
      do_gather(tag[OPP_DIR(dir)]);
    }
  }
  
#ifndef NO_LONG_LINKS
  /* and 3rd neighbours */
  for( dir=X3UP; dir <= T3UP; dir++){
    /**printf("dslash_fn_site_special: down gathers, start=%d\n",start);**/
    if (start==1){
      tag[OPP_3_DIR(dir)] = 
	declare_strided_gather( (char *)(templongvec+INDEX_3RD(dir)), 
				4*sizeof(su3_vector), 
				sizeof(su3_vector), OPP_3_DIR(dir), 
				parity, gen_pt[OPP_3_DIR(dir)] );
      prepare_gather(tag[OPP_3_DIR(dir)]);
      do_gather(tag[OPP_3_DIR(dir)]);
    }	else {
      do_gather(tag[OPP_3_DIR(dir)]);
    }
  }
#endif
  
  /* Wait gathers from positive directions, multiply by matrix and
     accumulate */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[dir]);
  }
  
  /* wait for the 3-neighbours from positive directions, multiply */
  for(dir=X3UP; dir<=T3UP; dir++){
    wait_gather(tag[dir]);
  }
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, private(fat4,long4) ){
    if( i < loopend-FETCH_UP ){
      fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
      prefetch_4MVVVV( 
		      fat4,
		      (su3_vector *)gen_pt[XUP][i+FETCH_UP],
		      (su3_vector *)gen_pt[YUP][i+FETCH_UP],
		      (su3_vector *)gen_pt[ZUP][i+FETCH_UP],
		      (su3_vector *)gen_pt[TUP][i+FETCH_UP] );
#ifndef NO_LONG_LINKS
      long4 = &(t_longlink[4*(i+FETCH_UP)]);
      prefetch_VV(
		  (su3_vector *)F_PT(s+FETCH_UP,dest),
		  templongv1+i+FETCH_UP);
      prefetch_4MVVVV( 
		      long4,
		      (su3_vector *)gen_pt[X3UP][i+FETCH_UP],
		      (su3_vector *)gen_pt[Y3UP][i+FETCH_UP],
		      (su3_vector *)gen_pt[Z3UP][i+FETCH_UP],
		      (su3_vector *)gen_pt[T3UP][i+FETCH_UP] );
#endif
    }
    fat4 = &(t_fatlink[4*i]);
    mult_su3_mat_vec_sum_4dir( fat4,
	       (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i],
	       (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i],
	       (su3_vector *)F_PT(s,dest));
#ifndef NO_LONG_LINKS
    long4 = &(t_longlink[4*i]);
    mult_su3_mat_vec_sum_4dir( long4,
	       (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i],
	       (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i],
	       templongv1+i);
#endif
  } END_LOOP_OMP
      
  /* Wait gathers from negative directions, accumulate (negative) */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[OPP_DIR(dir)]);
  } 
  
  /* and the same for the negative 3-rd neighbours */
  
  for(dir=X3UP; dir<=T3UP; dir++){
    wait_gather(tag[OPP_3_DIR(dir)]);
  }
  
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, ){
    if( i < loopend-FETCH_UP ){
#ifndef NO_LONG_LINKS
      prefetch_VV(
		  (su3_vector *)F_PT(s+FETCH_UP,dest),
		  templongv1+i+FETCH_UP);
#endif
      prefetch_VVVV( 
		    (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
      prefetch_VVVV( 
		    (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
    }
    sub_four_su3_vecs( (su3_vector *)F_PT(s,dest),
		       (su3_vector *)(gen_pt[XDOWN][i]),
		       (su3_vector *)(gen_pt[YDOWN][i]),
		       (su3_vector *)(gen_pt[ZDOWN][i]),
		       (su3_vector *)(gen_pt[TDOWN][i]) );
#ifndef NO_LONG_LINKS
    sub_four_su3_vecs( templongv1+i,
		       (su3_vector *)(gen_pt[X3DOWN][i]),
		       (su3_vector *)(gen_pt[Y3DOWN][i]),
		       (su3_vector *)(gen_pt[Z3DOWN][i]),
		       (su3_vector *)(gen_pt[T3DOWN][i]) );
    /*** Now need to add these things together ***/
    add_su3_vector((su3_vector *)F_PT(s,dest), templongv1+i,
		   (su3_vector *)F_PT(s,dest));
#endif
  } END_LOOP_OMP
      
}