void load_fatlinks_cpu(info_t *info, su3_matrix *fat, ks_component_paths *p, su3_matrix *links){ register int i; int dir; register su3_matrix *fat1; su3_matrix *staple = NULL, *tempmat1 = NULL; char myname[] = "load_fatlinks_cpu"; #ifdef ASQ_OPTIMIZED_FATTENING int nu,rho,sig ; Real one_link; #else int ipath; int disp[4]; int num_q_paths = p->num_q_paths; Q_path *q_paths = p->q_paths; #endif double dtime = -dclock(); staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } #ifndef ASQ_OPTIMIZED_FATTENING /* general case code */ for (dir=XUP; dir<=TUP; dir++){ /* loop over fatlink directions */ /* set fatlink to zero */ FORALLFIELDSITES(i){ fat1 = fat + 4*i + dir; clear_su3mat( fat1 ); } /* loop over paths, checking for ones with total displacement 1*dir */ for( ipath=0; ipath<num_q_paths; ipath++ ){ /* loop over paths */ /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<q_paths[ipath].length; i++){ if( GOES_FORWARDS(q_paths[ipath].dir[i]) ) disp[ q_paths[ipath].dir[i] ]++; else disp[OPP_DIR(q_paths[ipath].dir[i]) ]--; } for( disp[dir]+=1,i=XUP; i<=TUP; i++)if(disp[i]!=0)break; if( i<=TUP )continue; /* skip if path doesn't go to right place */ /**printf("dir = %d, found a path: ",dir); for(j=0;j<q_paths.[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]); printf("\n");**/ // path_product( q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); // path_product_field( q_paths[ipath].dir, q_paths[ipath].length, // tempmat1, links ); path_product_fields( links, q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); FORALLFIELDSITES(i){ su3_adjoint( &tempmat1[i], &staple[i] ); fat1 = fat + 4*i + dir; scalar_mult_add_su3_matrix( fat1, &staple[i], -q_paths[ipath].coeff, fat1 ); /* minus sign in coeff. because we used backward path*/ } } /* ipath */ } /* loop over directions */ #else /* ASQ_OPTIMIZED_FATTENING, for Asq and Asqtad actions */ /* Optimized fattening code for the Asq and Asqtad actions. * * I assume that path 0 is the one link path 2 the 3-staple * * path 3 the 5-staple path 4 the 7-staple and path 5 the Lepage term. * * Path 1 is the Naik term. */ /* to fix up the Lepage term, included by a trick below */ one_link = (p->act_path_coeff.one_link - 6.0*p->act_path_coeff.lepage); for (dir=XUP; dir<=TUP; dir++){ FORALLFIELDSITES(i) /* Intialize fat links with c_1*U_\mu(x) */ { fat1 = fat + 4*i + dir; scalar_mult_su3_matrix(links + 4*i + dir, one_link, fat1 ); } /* Skip the rest of the calculation if the remaining coefficients vanish */ if( p->act_path_coeff.three_staple == 0.0 && p->act_path_coeff.lepage == 0.0 && p->act_path_coeff.five_staple == 0.0)continue; for(nu=XUP; nu<=TUP; nu++) if(nu!=dir) { // compute_gen_staple_site(staple,dir,nu,F_OFFSET(link[dir]), // *t_fl, act_path_coeff.three_staple); compute_gen_staple_field(staple, dir, nu, links + dir, 4, fat, p->act_path_coeff.three_staple, links); /* The Lepage term */ /* Note this also involves modifying c_1 (above) */ compute_gen_staple_field(NULL, dir, nu, staple, 1, fat, p->act_path_coeff.lepage, links); for(rho=XUP; rho<=TUP; rho++) if((rho!=dir)&&(rho!=nu)) { compute_gen_staple_field( tempmat1, dir, rho, staple, 1, fat, p->act_path_coeff.five_staple, links); for(sig=XUP; sig<=TUP; sig++) if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) { compute_gen_staple_field(NULL,dir,sig,tempmat1, 1, fat, p->act_path_coeff.seven_staple, links); } /* sig */ } /* rho */ } /* nu */ }/* dir */ #endif special_free(staple); staple = NULL; special_free(tempmat1); tempmat1 = NULL; dtime += dclock(); info->final_sec += dtime; info->final_flop = 61632.*volume/numnodes(); if( p->act_path_coeff.three_staple == 0.0 && p->act_path_coeff.lepage == 0.0 && p->act_path_coeff.five_staple == 0.0) info->final_flop = 72.*volume/numnodes(); } /* load_fatlinks_cpu */
static void destroy_mn_special(su3_matrix *m){ special_free(m); }
/* update the momenta with the gauge force */ void imp_gauge_force_cpu( Real eps, field_offset mom_off ){ register int i,dir; register site *st; su3_matrix tmat1,tmat2; register Real eb3; register anti_hermitmat* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); int max_length = get_max_length(); int nloop = get_nloop(); int nreps = get_nreps(); #ifdef GFTIME int nflop = 153004; /* For Symanzik1 action */ double dtime; #endif int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; su3_matrix *links; #ifdef GFTIME dtime=-dclock(); #endif dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); terminate(1); } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); terminate(1); } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } eb3 = eps*beta/3.0; links = create_G_from_site(); /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES_OMP(i,st,private(j,k))for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP_OMP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product_fields(links, path_dir, path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES_OMP(i,st, private(tmat1,new_term,act2,action) ){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(&(st->link[dir]), &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP_OMP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES_OMP(i,st, private(tmat1,tmat2,momentum) ){ mult_su3_na( &(st->link[dir]), &(staple[i]), &tmat1 ); momentum = (anti_hermitmat *)F_PT(st,mom_off); uncompress_anti_hermitian( &momentum[dir], &tmat2 ); scalar_mult_sub_su3_matrix( &tmat2, &tmat1, eb3, &(staple[i]) ); make_anti_hermitian( &(staple[i]), &momentum[dir] ); } END_LOOP_OMP } /* dir loop */ #ifdef GFTIME dtime+=dclock(); node0_printf("GFTIME: time = %e (Symanzik1) mflops = %e\n",dtime, nflop*(double)volume/(1e6*dtime*numnodes()) ); #endif free(path_dir); free(dirs); destroy_G(links); special_free(staple); special_free(tempmat1); } /* imp_gauge_force.c */