void
load_fatlinks_cpu(info_t *info, su3_matrix *fat, ks_component_paths *p, 
		  su3_matrix *links){
  register int i;
  int dir;
  register su3_matrix *fat1;
  su3_matrix *staple = NULL, *tempmat1 = NULL;
  char myname[] = "load_fatlinks_cpu";

#ifdef ASQ_OPTIMIZED_FATTENING
  int  nu,rho,sig ;
  Real one_link;
#else
  int ipath;
  int disp[4];
  int num_q_paths = p->num_q_paths;
  Q_path *q_paths = p->q_paths;
#endif

  double dtime = -dclock();

  staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
  if(staple == NULL){
    printf("%s: Can't malloc temporary\n",myname);
    terminate(1);
  }

  tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
  if(tempmat1 == NULL){
    printf("%s: Can't malloc temporary\n",myname);
    terminate(1);
  }

#ifndef  ASQ_OPTIMIZED_FATTENING   /* general case code */
  for (dir=XUP; dir<=TUP; dir++){ /* loop over fatlink directions */
    /* set fatlink to zero */
    FORALLFIELDSITES(i){
      fat1 = fat + 4*i + dir;
      clear_su3mat( fat1 );
    }
    
    /* loop over paths, checking for ones with total displacement 1*dir */
    for( ipath=0; ipath<num_q_paths; ipath++ ){  /* loop over paths */
	/* compute total displacement of path */
	for(i=XUP;i<=TUP;i++)disp[i]=0;
	for( i=0; i<q_paths[ipath].length; i++){
	  if( GOES_FORWARDS(q_paths[ipath].dir[i]) )
	    disp[        q_paths[ipath].dir[i]  ]++;
	  else
	    disp[OPP_DIR(q_paths[ipath].dir[i]) ]--;
	}
	for( disp[dir]+=1,i=XUP; i<=TUP; i++)if(disp[i]!=0)break;
	if( i<=TUP )continue;  /* skip if path doesn't go to right place */
/**printf("dir = %d, found a path:  ",dir);
for(j=0;j<q_paths.[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]);
printf("\n");**/

//	path_product( q_paths[ipath].dir, q_paths[ipath].length, tempmat1 );
//	path_product_field( q_paths[ipath].dir, q_paths[ipath].length, 
//			    tempmat1, links );
	path_product_fields( links, q_paths[ipath].dir, q_paths[ipath].length, 
			     tempmat1 );
	FORALLFIELDSITES(i){
	  su3_adjoint( &tempmat1[i], &staple[i] );
	  fat1 = fat +  4*i + dir;
          scalar_mult_add_su3_matrix( fat1,
	    &staple[i], -q_paths[ipath].coeff, fat1 );
		/* minus sign in coeff. because we used backward path*/
	}
    } /* ipath */
  } /* loop over directions */
#else	/* ASQ_OPTIMIZED_FATTENING, for Asq and Asqtad actions */
/*  Optimized fattening code for the Asq and Asqtad actions.           *
 * I assume that path 0 is the one link path 2 the 3-staple            *
 * path 3 the 5-staple path 4 the 7-staple and path 5 the Lepage term. *
 * Path 1 is the Naik term.                                            */
 
 /* to fix up the Lepage term, included by a trick below */
 one_link = (p->act_path_coeff.one_link - 6.0*p->act_path_coeff.lepage);
 
 for (dir=XUP; dir<=TUP; dir++){
   FORALLFIELDSITES(i) /* Intialize fat links with c_1*U_\mu(x) */
     {
       fat1 = fat +  4*i + dir;
       scalar_mult_su3_matrix(links + 4*i + dir, one_link,
			      fat1 );
     }
   /* Skip the rest of the calculation if the remaining coefficients vanish */
   if( p->act_path_coeff.three_staple == 0.0 &&
       p->act_path_coeff.lepage == 0.0 &&
       p->act_path_coeff.five_staple == 0.0)continue;

   for(nu=XUP; nu<=TUP; nu++) if(nu!=dir)
     {
//       compute_gen_staple_site(staple,dir,nu,F_OFFSET(link[dir]),
//			       *t_fl, act_path_coeff.three_staple);

       compute_gen_staple_field(staple, dir, nu, links + dir, 4,
				fat, p->act_path_coeff.three_staple, links);
       /* The Lepage term */
       /* Note this also involves modifying c_1 (above) */
       compute_gen_staple_field(NULL, dir, nu, staple, 1,
				fat, p->act_path_coeff.lepage, links);
       for(rho=XUP; rho<=TUP; rho++) if((rho!=dir)&&(rho!=nu))
	 {
	   compute_gen_staple_field( tempmat1, dir, rho, staple, 1,
				     fat, p->act_path_coeff.five_staple, links);
	   for(sig=XUP; sig<=TUP; sig++)
	     if((sig!=dir)&&(sig!=nu)&&(sig!=rho))
	       {
		 compute_gen_staple_field(NULL,dir,sig,tempmat1, 1,
				  fat, p->act_path_coeff.seven_staple, links);
	       } /* sig */
	 } /* rho */
     } /* nu */
 }/* dir */  
#endif

 special_free(staple);  staple = NULL;
 special_free(tempmat1); tempmat1 = NULL;

 dtime += dclock();
 info->final_sec += dtime;
 info->final_flop = 61632.*volume/numnodes();
 if( p->act_path_coeff.three_staple == 0.0 &&
     p->act_path_coeff.lepage == 0.0 &&
     p->act_path_coeff.five_staple == 0.0)
   info->final_flop = 72.*volume/numnodes();

}  /* load_fatlinks_cpu */
Example #2
0
static void destroy_mn_special(su3_matrix *m){
  special_free(m);
}
Example #3
0
/* update the momenta with the gauge force */
void imp_gauge_force_cpu( Real eps, field_offset mom_off ){
    register int i,dir;
    register site *st;
    su3_matrix tmat1,tmat2;
    register Real eb3;
    register anti_hermitmat* momentum;
    su3_matrix *staple, *tempmat1;

    /* lengths of various kinds of loops */
    int *loop_length = get_loop_length();
    /* number of rotations/reflections  for each kind */
    int *loop_num = get_loop_num();
    /* table of directions, 1 for each kind of loop */
    int ***loop_table = get_loop_table();
    /* table of coefficients in action, for various "representations"
	(actually, powers of the trace) */
    Real **loop_coeff = get_loop_coeff();
    int max_length = get_max_length();
    int nloop = get_nloop();
    int nreps = get_nreps();

#ifdef GFTIME
    int nflop = 153004;  /* For Symanzik1 action */
    double dtime;
#endif
    int j,k;
    int *dirs,length;
    int *path_dir,path_length;

    int ln,iloop;
    Real action,act2,new_term;

    int ncount;
    char myname[] = "imp_gauge_force";
    su3_matrix *links;

#ifdef GFTIME
    dtime=-dclock();
#endif

    dirs = (int *)malloc(max_length*sizeof(int));
    if(dirs == NULL){
      printf("%s(%d): Can't malloc dirs\n",myname,this_node);
      terminate(1);
    }
    path_dir = (int *)malloc(max_length*sizeof(int));
    if(path_dir == NULL){
      printf("%s(%d): Can't malloc path_dir\n",myname,this_node);
      terminate(1);
    }
    staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
    if(staple == NULL){
      printf("%s(%d): Can't malloc temporary\n",myname,this_node);
      terminate(1);
    }

    tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
    if(tempmat1 == NULL){
      printf("%s(%d): Can't malloc temporary\n",myname,this_node);
      terminate(1);
    }

    eb3 = eps*beta/3.0;
    links = create_G_from_site();

    /* Loop over directions, update mom[dir] */
    for(dir=XUP; dir<=TUP; dir++){

	FORALLSITES_OMP(i,st,private(j,k))for(j=0;j<3;j++)for(k=0;k<3;k++){
			staple[i].e[j][k]=cmplx(0.0,0.0);
	} END_LOOP_OMP

	ncount=0;
	for(iloop=0;iloop<nloop;iloop++){
	    length=loop_length[iloop];
	    for(ln=0;ln<loop_num[iloop];ln++){
/**printf("UPD:  "); printpath( loop_table[iloop][ln], length );**/
		/* set up dirs.  we are looking at loop starting in "XUP"
		   direction, rotate so it starts in "dir" direction. */
		for(k=0;k<length;k++){
                    if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){
                	dirs[k]=(dir+loop_table[iloop][ln][k] )% 4;
		    }
            	    else {
                        dirs[k]=OPP_DIR(
			    (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 );
		    }
		}

		path_length= length-1;  /* generalized "staple" */

		/* check for links in direction of momentum to be
		   updated, each such link gives a contribution. Note
		   the direction of the path - opposite the link. */
		for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) {
		    if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) {
			path_dir[j] = dirs[(k+j+1)%length];
		    }
		    if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) {
			path_dir[path_length-1-j] =
			    OPP_DIR(dirs[(k+j+1)%length]);
		    }
/**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/
		    path_product_fields(links, path_dir, path_length, tempmat1);

		    /* We took the path in the other direction from our
			old convention in order to get it to end up
			"at our site", so now take adjoint */
		    /* then compute "single_action" contribution to
			staple */
		    FORALLSITES_OMP(i,st, private(tmat1,new_term,act2,action) ){
			su3_adjoint( &(tempmat1[i]), &tmat1 );
			/* first we compute the fundamental term */
			new_term = loop_coeff[iloop][0];

			/* now we add in the higher representations */
			if(nreps > 1){
node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0);
			    act2=1.0;
			    action = 3.0 - realtrace_su3(&(st->link[dir]),
				&tmat1 ); 

			    for(j=1;j<nreps;j++){
				act2 *= action;
				new_term +=
				    loop_coeff[iloop][j]*act2*(Real)(j+1);
			    }
			}  /* end if nreps > 1 */

			scalar_mult_add_su3_matrix( &(staple[i]), &tmat1,
				new_term, &(staple[i]) );

		    } END_LOOP_OMP

		    ncount++;

		} /* k (location in path) */
	    } /* ln */
	} /* iloop */

	/* Now multiply the staple sum by the link, then update momentum */
	FORALLSITES_OMP(i,st, private(tmat1,tmat2,momentum) ){
	    mult_su3_na( &(st->link[dir]), &(staple[i]), &tmat1 );
	    momentum = (anti_hermitmat *)F_PT(st,mom_off);
	    uncompress_anti_hermitian( &momentum[dir], &tmat2 );
	    scalar_mult_sub_su3_matrix( &tmat2, &tmat1,
		eb3, &(staple[i]) );
	    make_anti_hermitian( &(staple[i]), &momentum[dir] );
	} END_LOOP_OMP
    } /* dir loop */
#ifdef GFTIME
dtime+=dclock();
node0_printf("GFTIME:   time = %e (Symanzik1) mflops = %e\n",dtime,
	     nflop*(double)volume/(1e6*dtime*numnodes()) );
#endif
 free(path_dir);
 free(dirs);
 destroy_G(links);
 special_free(staple); 
 special_free(tempmat1); 
} /* imp_gauge_force.c */