void update_u_cpu( Real eps ){ register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register Real t2,t3,t4,t5,t6,t7,t8; /**TEMP** Real gf_x,gf_av,gf_max; int gf_i,gf_j; **END TEMP **/ /**double dtime,dtime2,dclock();**/ /**dtime = -dclock();**/ /* Take divisions out of site loop (can't be done by compiler) */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; t7 = eps/7.0; t8 = eps/8.0; /** TEMP ** gf_av=gf_max=0.0; **END TEMP**/ #ifdef FN invalidate_fermion_links(fn_links); // free_fn_links(&fn_links); // free_fn_links(&fn_links_dmdu0); #endif FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t8,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t7,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } /**dtime += dclock(); node0_printf("LINK_UPDATE: time = %e mflops = %e\n", dtime, (double)(5616.0*volume/(1.0e6*dtime*numnodes())) );**/ } /* update_u */
void update_u( double eps ){ register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register double t2,t3,t4,t5,t6; /**TEMP** double gf_x,gf_av,gf_max; int gf_i,gf_j; **END TEMP **/ /**double dtime,dtime2,dclock();**/ /**dtime = -dclock();**/ /* Temporary by-hand optimization until pgcc compiler bug is fixed */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; /** TEMP ** gf_av=gf_max=0.0; **END TEMP**/ FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/6.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/5.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/4.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/3.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/2.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } #ifdef FN valid_longlinks=0; valid_fatlinks=0; #endif /**dtime += dclock(); node0_printf("LINK_UPDATE: time = %e mflops = %e\n", dtime, (double)(5616.0*volume/(1.0e6*dtime*numnodes())) );**/ } /* update_u */
// ----------------------------------------------------------------- // Calculate U = exp(A).U // Goes to eighth order in the exponential: // exp(A) * U = ( 1 + A + A^2/2 + A^3/3 ...) * U // = U + A*(U + (A/2)*(U + (A/3)*( ... ))) void exp_mult(int dir, double eps, anti_hermitmat *A) { register int i; register site *s; matrix *link, temp1, temp2, htemp; register Real t2, t3, t4, t5, t6, t7, t8; // Take divisions out of site loop (can't be done by compiler) t2 = eps / 2.0; t3 = eps / 3.0; t4 = eps / 4.0; t5 = eps / 5.0; t6 = eps / 6.0; t7 = eps / 7.0; t8 = eps / 8.0; FORALLSITES(i, s) { uncompress_anti_hermitian(&(A[i]), &htemp); link = &(s->link[dir]); mult_nn(&htemp, link, &temp1); scalar_mult_add_matrix(link, &temp1, t8, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t7, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t6, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t5, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t4, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t3, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t2, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, eps, &temp2); mat_copy(&temp2, link); // This step updates the link U[dir] }
void update_u(Real eps) { register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register Real t2,t3,t4,t5,t6; /* Temporary by-hand optimization until pgcc compiler bug is fixed */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; invalidate_fermion_links(fn_links); FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++) if(dir==TUP || s->t>0){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/6.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/5.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/4.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/3.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/2.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } } /* update_u */
void gluon_prop( void ) { register int i,dir; register int pmu; register site *s; anti_hermitmat ahtmp; Real pix, piy, piz, pit; Real sin_pmu, sin_pmu2, prop_s, prop_l, ftmp1, ftmp2; complex ctmp; su3_matrix mat; struct { Real f1, f2; } msg; double trace, dmuAmu; int px, py, pz, pt; int currentnode,newnode; pix = PI / (Real)nx; piy = PI / (Real)ny; piz = PI / (Real)nz; pit = PI / (Real)nt; trace = 0.0; /* Make A_mu as anti-hermition traceless part of U_mu */ /* But store as SU(3) matrix for call to FFT */ for(dir=XUP; dir<=TUP; dir++) { FORALLSITES(i,s){ trace += (double)(trace_su3( &(s->link[dir]))).real; make_anti_hermitian( &(s->link[dir]), &ahtmp); uncompress_anti_hermitian( &ahtmp, &(s->a_mu[dir])); } g_sync(); /* Now Fourier transform */ restrict_fourier_site(F_OFFSET(a_mu[dir]), sizeof(su3_matrix), FORWARDS); }
static void copy_milc_to_D_F(dsu3_matrix *dest, anti_hermitmat *src){ su3_matrix t; uncompress_anti_hermitian( src, &t ); p2d_mat( dest, &t ); }
/* update the momenta with the gauge force */ void imp_gauge_force_cpu( Real eps, field_offset mom_off ){ register int i,dir; register site *st; su3_matrix tmat1,tmat2; register Real eb3; register anti_hermitmat* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); int max_length = get_max_length(); int nloop = get_nloop(); int nreps = get_nreps(); #ifdef GFTIME int nflop = 153004; /* For Symanzik1 action */ double dtime; #endif int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; #ifdef GFTIME dtime=-dclock(); #endif dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); terminate(1); } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); terminate(1); } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } eb3 = eps*beta/3.0; /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product(path_dir,path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES(i,st){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(&(st->link[dir]), &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES(i,st){ mult_su3_na( &(st->link[dir]), &(staple[i]), &tmat1 ); momentum = (anti_hermitmat *)F_PT(st,mom_off); uncompress_anti_hermitian( &momentum[dir], &tmat2 ); scalar_mult_sub_su3_matrix( &tmat2, &tmat1, eb3, &(staple[i]) ); make_anti_hermitian( &(staple[i]), &momentum[dir] ); } END_LOOP