void scalar_mult_add_hwvec_proj( su3_matrix * const a, half_wilson_vector * const b, half_wilson_vector * const c, Real * const s, su3_matrix *d ) { int i,j; Real tmp0,tmp1; #ifdef FAST for(i=0;i<3;i++)for(j=0;j<3;j++){ tmp1 = b->h[0].c[i].real * c->h[0].c[j].real; tmp0 = b->h[0].c[i].imag * c->h[0].c[j].imag; d->e[i][j].real = a->e[i][j].real + (tmp0 + tmp1)*s[0]; tmp1 = b->h[0].c[i].real * c->h[0].c[j].imag; tmp0 = b->h[0].c[i].imag * c->h[0].c[j].real; d->e[i][j].imag += (tmp0 - tmp1)*s[0]; tmp1 = b->h[1].c[i].real * c->h[1].c[j].real; tmp0 = b->h[1].c[i].imag * c->h[1].c[j].imag; d->e[i][j].real += (tmp0 + tmp1)*s[1]; tmp1 = b->h[1].c[i].real * c->h[1].c[j].imag; tmp0 = b->h[1].c[i].imag * c->h[1].c[j].real; d->e[i][j].imag += (tmp0 - tmp1)*s[1]; } #else su3_matrix tmat; su3_projector(&(b->h[0]), &(c->h[0]), &tmat); scalar_mult_add_su3_matrix(d, &tmat, s[0], d ); su3_projector(&(b->h[1]), &(c->h[1]), &tmat); scalar_mult_add_su3_matrix(d, &tmat, s[1], d ); #endif }
void block_nhyp3() { register int dir, dir2, i; register site *st; Real f[3]; /* related code is specific to SU(3) */ Real ftmp1,ftmp2; su3_matrix tmat, Omega, eQ, Q, Q2; ftmp1=alpha_smear[0]/(6.*(1.-alpha_smear[0])); ftmp2=1.-alpha_smear[0]; for(dir=XUP;dir<=TUP;dir++){ /* compute the staple */ FORALLDYNLINKS(i,st,dir) clear_su3mat(&Staple3[dir][i]); for(dir2=XUP;dir2<=TUP;dir2++) if(dir2!=dir){ #if (SMEAR_LEVEL>1) staple_nhyp(dir,dir2,hyplink2[dir2][dir], hyplink2[dir][dir2],Staple3[dir]); #else /* one-level only */ staple_nhyp(dir,dir2,gauge_field_thin[dir], gauge_field_thin[dir2],Staple3[dir]); #endif } FORALLDYNLINKS(i,st,dir){ /* make Omega */ scalar_mult_add_su3_matrix(gauge_field_thin[dir]+i, Staple3[dir]+i,ftmp1 ,&Q); scalar_mult_su3_matrix(&Q,ftmp2,&Omega); Staple3[dir][i]=Omega; mult_su3_an(&Omega,&Omega,&Q); /* IR regulator, see clover_xxx/defines.h */ scalar_add_diag_su3(&Q,IR_STAB); #ifndef NHYP_DEBUG compute_fhb(&Q,f,NULL, 0); #else compute_fhb(&Omega,&Q,f,NULL, 0); #endif /* make Q**2 */ mult_su3_nn(&Q,&Q,&Q2); /* compute Q^(-1/2) via Eq. 19 */ scalar_mult_su3_matrix(&Q,f[1],&tmat); scalar_mult_add_su3_matrix(&tmat,&Q2,f[2],&eQ); scalar_add_diag_su3(&eQ,f[0]); /* multiply Omega by eQ = (Omega^\dagger Omega)^(-1/2) */ mult_su3_nn(&Omega,&eQ,gauge_field[dir]+i); } } /* dir */
void update_u_cpu( Real eps ){ register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register Real t2,t3,t4,t5,t6,t7,t8; /**TEMP** Real gf_x,gf_av,gf_max; int gf_i,gf_j; **END TEMP **/ /**double dtime,dtime2,dclock();**/ /**dtime = -dclock();**/ /* Take divisions out of site loop (can't be done by compiler) */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; t7 = eps/7.0; t8 = eps/8.0; /** TEMP ** gf_av=gf_max=0.0; **END TEMP**/ #ifdef FN invalidate_fermion_links(fn_links); // free_fn_links(&fn_links); // free_fn_links(&fn_links_dmdu0); #endif FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t8,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t7,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } /**dtime += dclock(); node0_printf("LINK_UPDATE: time = %e mflops = %e\n", dtime, (double)(5616.0*volume/(1.0e6*dtime*numnodes())) );**/ } /* update_u */
void update_u( double eps ){ register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register double t2,t3,t4,t5,t6; /**TEMP** double gf_x,gf_av,gf_max; int gf_i,gf_j; **END TEMP **/ /**double dtime,dtime2,dclock();**/ /**dtime = -dclock();**/ /* Temporary by-hand optimization until pgcc compiler bug is fixed */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; /** TEMP ** gf_av=gf_max=0.0; **END TEMP**/ FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/6.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/5.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/4.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/3.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/2.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } #ifdef FN valid_longlinks=0; valid_fatlinks=0; #endif /**dtime += dclock(); node0_printf("LINK_UPDATE: time = %e mflops = %e\n", dtime, (double)(5616.0*volume/(1.0e6*dtime*numnodes())) );**/ } /* update_u */
void update_u(Real eps) { register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register Real t2,t3,t4,t5,t6; /* Temporary by-hand optimization until pgcc compiler bug is fixed */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; invalidate_fermion_links(fn_links); FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++) if(dir==TUP || s->t>0){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/6.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/5.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/4.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/3.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/2.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } } /* update_u */
void monte_block_ape_b(int NumStp1) { int NumTrj,Nhit, index1, ina, inb,ii,cb; int parity; Real b3; register int dir,i; register site *st; void dsdu_ape(register int dir1, int parity); su3_matrix tmat1; Real a0,a1,a2,a3,asq; int index,ind1,ind2,step; su2_matrix h; Real alpha; int NumStp; NumStp=3 ; Nhit=5; alpha=0.3; b3=alpha/(1.0-alpha)/6.0; b3=1.0/b3; if(this_node==0)printf("pure APE blocking with alpha %e N %d\n",alpha,NumStp); /* set bb_link=link */ FORALLSITES(i,st)for(dir=XUP;dir<=TUP;dir++){ st->blocked_link[8+dir]= st->link[dir]; } /* ape blocking steps_rg levels*/ for(step=1;step<=NumStp;step++){ for(parity=ODD;parity<=EVEN;parity++) for(dir=XUP;dir<=TUP;dir++){ /* compute the gauge force */ dsdu_ape(dir,parity); FORSOMEPARITY(i,st,parity){ { /* set blocked_link=bb_link temporarily*/ st->blocked_link[4+dir]= st->blocked_link[8+dir]; /* add the staple to the blocked link. ``staple'' will become the new blocked_link after normalization */ scalar_mult_add_su3_matrix(&(st->tempmat2), &(st->blocked_link[8+dir]),b3,&(st->tempmat2)); /* if(i==0&&step==1){ printf("\n\n step=%d i=%d dir=%d\n",step,i,dir); dumpmat(&(st->blocked_link[8+dir])); dumpmat(&(st->tempmat2)); }*/ /* Now do hits in the SU(2) subgroup to "normalize" staple */ for(index=0;index<3*Nhit;index++){ /* pick out an SU(2) subgroup */ ind1=(index) % 3; ind2=(index+1) % 3; if(ind1 > ind2){ ii=ind1; ind1=ind2; ind2=ii;} mult_su3_na( &(st->blocked_link[4+dir]), &(st->tempmat2), &tmat1 ); /* Extract SU(2) subgroup in Pauli matrix representation, a0 + i * sum_j a_j sigma_j, from the SU(3) matrix tmat1 */ a0 = tmat1.e[ind1][ind1].real + tmat1.e[ind2][ind2].real; a1 = tmat1.e[ind1][ind2].imag + tmat1.e[ind2][ind1].imag; a2 = tmat1.e[ind1][ind2].real - tmat1.e[ind2][ind1].real; a3 = tmat1.e[ind1][ind1].imag - tmat1.e[ind2][ind2].imag; /* Normalize and put complex conjugate into u */ asq = a0*a0 + a1*a1 + a2*a2 + a3*a3; asq = sqrt((double)asq); a0 = a0/asq; a1 = a1/asq; a2 = a2/asq; a3 = a3/asq; h.e[0][0] = cmplx( a0,-a3); h.e[0][1] = cmplx(-a2,-a1); h.e[1][0] = cmplx( a2,-a1); h.e[1][1] = cmplx( a0, a3); /* Do the SU(2) hit */ left_su2_hit_n( &h, ind1, ind2, &(st->blocked_link[4+dir])); } /* indices */ } /* end loop over sites */ }} /* direction and parity */
static su3_matrix *create_longlinks_qop_milc(QOP_info_t *info, QOP_asqtad_coeffs_t *coeffs, QOP_GaugeField *gauge) { register int i; register site *s; int ipath,dir; int disp[4]; int num_q_paths = ks_act_paths.num_q_paths; Q_path *q_paths = ks_act_paths.q_paths; register su3_matrix *long1; su3_matrix *staple, *tempmat1; int nflop = 1804; double dtime; su3_matrix *t_ll; char myname[] = "create_longlinks_qop_milc"; dtime=-dclock(); if( phases_in != 1){ node0_printf("BOTCH: %s needs phases in\n",myname); terminate(0); } /* Allocate space for t_longlink if NULL */ t_ll = (su3_matrix *)special_alloc(sites_on_node*4*sizeof(su3_matrix)); if(t_ll==NULL){ printf("NODE %d: no room for t_ll\n",this_node); terminate(1); } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } for (dir=XUP; dir<=TUP; dir++){ /* loop over longlink directions */ /* set longlink to zero */ FORALLSITES(i,s){ long1 = &(t_ll[4*i+dir]); clear_su3mat( long1 ); } /* loop over paths, checking for ones with total displacement 3*dir */ for( ipath=0; ipath<num_q_paths; ipath++ ){ /* loop over paths */ /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<q_paths[ipath].length; i++){ if( GOES_FORWARDS(q_paths[ipath].dir[i]) ) disp[ q_paths[ipath].dir[i] ]++; else disp[OPP_DIR(q_paths[ipath].dir[i]) ]--; } for( disp[dir]+=3,i=XUP; i<=TUP; i++)if(disp[i]!=0)break; if( i<=TUP )continue; /* skip if path doesn't go to right place */ /**printf("ipath = %d, found a path: ",ipath); for(j=0;j<q_paths[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]); printf("\n");**/ path_product_qop_milc( q_paths[ipath].dir, q_paths[ipath].length, tempmat1, gauge ); FORALLSITES(i,s){ su3_adjoint( &tempmat1[i], &staple[i] ); long1 = &(t_ll[4*i+dir]); scalar_mult_add_su3_matrix( long1, &staple[i], -q_paths[ipath].coeff, long1 ); /* minus sign in coeff. because we used backward path*/ } } /* ipath */
/* update the momenta with the gauge force */ void QOP_symanzik_1loop_gauge_force(QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, Real eps) { register int i,dir; register site *st; su3_matrix tmat1; register Real eb3; /* Note: eps now includes eps*beta */ register su3_matrix* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); /* We make our own */ int max_length = get_max_length(); /* For Symanzik 1 loop! */ int nloop = get_nloop(); int nreps = get_nreps(); su3_matrix *forwardlink[4]; su3_matrix *tmpmom[4]; int nflop = 153004; /* For Symanzik1 action */ Real final_flop; double dtime; int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; dtime=-dclock(); info->status = QOP_FAIL; /* Parity requirements */ if(gauge->evenodd != QOP_EVENODD || force->evenodd != QOP_EVENODD ) { printf("QOP_asqtad_force: Bad parity gauge %d force %d\n", gauge->evenodd, force->evenodd); return; } /* Map field pointers to local static pointers */ FORALLUPDIR(dir){ forwardlink[dir] = gauge->g + dir*sites_on_node; tmpmom[dir] = force->f + dir*sites_on_node; } /* Check loop coefficients */ if(coeffs->plaquette != loop_coeff[0][0] || coeffs->rectangle != loop_coeff[1][0] || coeffs->parallelogram != loop_coeff[2][0]) { printf("%s(%d): Path coeffs don't match\n",myname,this_node); return; } /* Allocate arrays according to action */ dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); return; } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); return; } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); return; } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); return; } eb3 = eps/3.0; /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product(path_dir,path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES(i,st){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(forwardlink[dir]+i, &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES(i,st){ mult_su3_na( forwardlink[dir]+i, &(staple[i]), &tmat1 ); momentum = tmpmom[dir] + i; scalar_mult_sub_su3_matrix( momentum, &tmat1, eb3, momentum ); } END_LOOP
/*-------------------------------------------------------------------*/ void load_lnglinks(info_t *info, su3_matrix *lng, ks_component_paths *p, su3_matrix *links ) { register int i; int ipath,dir; int disp[4]; int num_q_paths = p->num_q_paths; Q_path *q_paths = p->q_paths; register su3_matrix *long1; su3_matrix *staple = NULL, *tempmat1 = NULL; char myname[] = "load_lnglinks"; double dtime = -dclock(); if( phases_in != 1){ node0_printf("BOTCH: %s needs phases in\n",myname); terminate(0); } staple = create_m_special(); tempmat1 = create_m_special(); for (dir=XUP; dir<=TUP; dir++){ /* loop over longlink directions */ /* set longlink to zero */ FORALLFIELDSITES_OMP(i,private(long1)){ long1 = lng + 4*i +dir; clear_su3mat( long1 ); } END_LOOP_OMP; /* loop over paths, checking for ones with total displacement 3*dir */ for( ipath=0; ipath<num_q_paths; ipath++ ){ /* loop over paths */ /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<q_paths[ipath].length; i++){ if( GOES_FORWARDS(q_paths[ipath].dir[i]) ) disp[ q_paths[ipath].dir[i] ]++; else disp[OPP_DIR(q_paths[ipath].dir[i]) ]--; } for( disp[dir]+=3,i=XUP; i<=TUP; i++)if(disp[i]!=0)break; if( i<=TUP )continue; /* skip if path doesn't go to right place */ /**printf("ipath = %d, found a path: ",ipath); for(j=0;j<q_paths[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]); printf("\n");**/ // path_product_field( q_paths[ipath].dir, q_paths[ipath].length, // tempmat1, links ); path_product_fields( links, q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); FORALLFIELDSITES(i){ su3_adjoint( &tempmat1[i], &staple[i] ); long1 = lng + 4*i + dir; scalar_mult_add_su3_matrix( long1, &staple[i], -q_paths[ipath].coeff, long1 ); /* minus sign in coeff. because we used backward path*/ } } /* ipath */ } /* loop over directions */ destroy_m_special(staple); staple = NULL; destroy_m_special(tempmat1); tempmat1 = NULL; dtime += dclock(); info->final_sec = dtime; info->final_flop = 1728.*volume/numnodes(); /* (formerly 1804) */ } /* load_lnglinks() */
void load_fatlinks_cpu(info_t *info, su3_matrix *fat, ks_component_paths *p, su3_matrix *links){ register int i; int dir; register su3_matrix *fat1; su3_matrix *staple = NULL, *tempmat1 = NULL; char myname[] = "load_fatlinks_cpu"; #ifdef ASQ_OPTIMIZED_FATTENING int nu,rho,sig ; Real one_link; #else int ipath; int disp[4]; int num_q_paths = p->num_q_paths; Q_path *q_paths = p->q_paths; #endif double dtime = -dclock(); staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } #ifndef ASQ_OPTIMIZED_FATTENING /* general case code */ for (dir=XUP; dir<=TUP; dir++){ /* loop over fatlink directions */ /* set fatlink to zero */ FORALLFIELDSITES(i){ fat1 = fat + 4*i + dir; clear_su3mat( fat1 ); } /* loop over paths, checking for ones with total displacement 1*dir */ for( ipath=0; ipath<num_q_paths; ipath++ ){ /* loop over paths */ /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<q_paths[ipath].length; i++){ if( GOES_FORWARDS(q_paths[ipath].dir[i]) ) disp[ q_paths[ipath].dir[i] ]++; else disp[OPP_DIR(q_paths[ipath].dir[i]) ]--; } for( disp[dir]+=1,i=XUP; i<=TUP; i++)if(disp[i]!=0)break; if( i<=TUP )continue; /* skip if path doesn't go to right place */ /**printf("dir = %d, found a path: ",dir); for(j=0;j<q_paths.[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]); printf("\n");**/ // path_product( q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); // path_product_field( q_paths[ipath].dir, q_paths[ipath].length, // tempmat1, links ); path_product_fields( links, q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); FORALLFIELDSITES(i){ su3_adjoint( &tempmat1[i], &staple[i] ); fat1 = fat + 4*i + dir; scalar_mult_add_su3_matrix( fat1, &staple[i], -q_paths[ipath].coeff, fat1 ); /* minus sign in coeff. because we used backward path*/ } } /* ipath */ } /* loop over directions */ #else /* ASQ_OPTIMIZED_FATTENING, for Asq and Asqtad actions */ /* Optimized fattening code for the Asq and Asqtad actions. * * I assume that path 0 is the one link path 2 the 3-staple * * path 3 the 5-staple path 4 the 7-staple and path 5 the Lepage term. * * Path 1 is the Naik term. */ /* to fix up the Lepage term, included by a trick below */ one_link = (p->act_path_coeff.one_link - 6.0*p->act_path_coeff.lepage); for (dir=XUP; dir<=TUP; dir++){ FORALLFIELDSITES(i) /* Intialize fat links with c_1*U_\mu(x) */ { fat1 = fat + 4*i + dir; scalar_mult_su3_matrix(links + 4*i + dir, one_link, fat1 ); } /* Skip the rest of the calculation if the remaining coefficients vanish */ if( p->act_path_coeff.three_staple == 0.0 && p->act_path_coeff.lepage == 0.0 && p->act_path_coeff.five_staple == 0.0)continue; for(nu=XUP; nu<=TUP; nu++) if(nu!=dir) { // compute_gen_staple_site(staple,dir,nu,F_OFFSET(link[dir]), // *t_fl, act_path_coeff.three_staple); compute_gen_staple_field(staple, dir, nu, links + dir, 4, fat, p->act_path_coeff.three_staple, links); /* The Lepage term */ /* Note this also involves modifying c_1 (above) */ compute_gen_staple_field(NULL, dir, nu, staple, 1, fat, p->act_path_coeff.lepage, links); for(rho=XUP; rho<=TUP; rho++) if((rho!=dir)&&(rho!=nu)) { compute_gen_staple_field( tempmat1, dir, rho, staple, 1, fat, p->act_path_coeff.five_staple, links); for(sig=XUP; sig<=TUP; sig++) if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) { compute_gen_staple_field(NULL,dir,sig,tempmat1, 1, fat, p->act_path_coeff.seven_staple, links); } /* sig */ } /* rho */ } /* nu */ }/* dir */ #endif special_free(staple); staple = NULL; special_free(tempmat1); tempmat1 = NULL; dtime += dclock(); info->final_sec += dtime; info->final_flop = 61632.*volume/numnodes(); if( p->act_path_coeff.three_staple == 0.0 && p->act_path_coeff.lepage == 0.0 && p->act_path_coeff.five_staple == 0.0) info->final_flop = 72.*volume/numnodes(); } /* load_fatlinks_cpu */
/* update the momenta with the gauge force */ void imp_gauge_force_cpu( Real eps, field_offset mom_off ){ register int i,dir; register site *st; su3_matrix tmat1,tmat2; register Real eb3; register anti_hermitmat* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); int max_length = get_max_length(); int nloop = get_nloop(); int nreps = get_nreps(); #ifdef GFTIME int nflop = 153004; /* For Symanzik1 action */ double dtime; #endif int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; #ifdef GFTIME dtime=-dclock(); #endif dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); terminate(1); } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); terminate(1); } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } eb3 = eps*beta/3.0; /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product(path_dir,path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES(i,st){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(&(st->link[dir]), &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES(i,st){ mult_su3_na( &(st->link[dir]), &(staple[i]), &tmat1 ); momentum = (anti_hermitmat *)F_PT(st,mom_off); uncompress_anti_hermitian( &momentum[dir], &tmat2 ); scalar_mult_sub_su3_matrix( &tmat2, &tmat1, eb3, &(staple[i]) ); make_anti_hermitian( &(staple[i]), &momentum[dir] ); } END_LOOP