void d_plaquette(double *ss_plaq,double *st_plaq) { register int i,dir1,dir2; register site *s; register su3_matrix *m1,*m4; double ss_sum,st_sum; msg_tag *mtag0,*mtag1; ss_sum = st_sum = 0.0; for(dir1=YUP;dir1<=TUP;dir1++){ for(dir2=XUP;dir2<dir1;dir2++){ mtag0 = start_gather_site( F_OFFSET(link[dir2]), sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); mtag1 = start_gather_site( F_OFFSET(link[dir1]), sizeof(su3_matrix), dir2, EVENANDODD, gen_pt[1] ); FORALLSITES(i,s){ m1 = &(s->link[dir1]); m4 = &(s->link[dir2]); mult_su3_an(m4,m1,&(s->tempmat1)); } wait_gather(mtag0); FORALLSITES(i,s){ #ifdef SCHROED_FUN if(dir1==TUP ){ if(s->t==(nt-1)){ mult_su3_nn( &(s->tempmat1), &(s->boundary[dir2]), &(s->staple)); } else{ mult_su3_nn( &(s->tempmat1), (su3_matrix *)(gen_pt[0][i]), &(s->staple)); } } else if(s->t > 0){ mult_su3_nn( &(s->tempmat1), (su3_matrix *)(gen_pt[0][i]), &(s->staple)); } #else mult_su3_nn( &(s->tempmat1),(su3_matrix *)(gen_pt[0][i]), &(s->staple) ); #endif } wait_gather(mtag1); FORALLSITES(i,s){ if(dir1==TUP )st_sum += (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&(s->staple) ); #ifdef SCHROED_FUN else if(s->t > 0) ss_sum += (double) #else else ss_sum += (double) #endif realtrace_su3((su3_matrix *)(gen_pt[1][i]),&(s->staple) ); } cleanup_gather(mtag0); cleanup_gather(mtag1); } }
FORALLSITES(i,s){ mult_su3_na( (su3_matrix *)(gen_pt[0][i]), (su3_matrix *)(gen_pt[1][i]), ((su3_matrix *)F_PT(s,f_mn)) ); mult_su3_nn( &(s->tempmat1), ((su3_matrix *)F_PT(s,f_mn)), &(s->tempmat2) ); mult_su3_nn( ((su3_matrix *)F_PT(s,f_mn)), &(s->tempmat1), &(s->staple) ); }
/* update the momenta with the gauge force */ void gauge_force(Real eps) { register int i,dir1,dir2; register site *st; msg_tag *tag0,*tag1,*tag2; int start; su3_matrix tmat1,tmat2; register Real eb3; /**double dtime,dclock(); dtime = -dclock();**/ eb3 = eps*beta/3.0; /* Loop over directions, update mom[dir1] */ for(dir1=XUP; dir1<=TUP; dir1++){ /* Loop over other directions, computing force from plaquettes in the dir1,dir2 plane */ start=1; /* indicates staple sum not initialized */ for(dir2=XUP;dir2<=TUP;dir2++)if(dir2 != dir1){ /* get link[dir2] from direction dir1 */ tag0 = start_gather_site( F_OFFSET(link[dir2]), sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); /* Start gather for the "upper staple" */ tag2 = start_gather_site( F_OFFSET(link[dir1]), sizeof(su3_matrix), dir2, EVENANDODD, gen_pt[2] ); /* begin the computation "at the dir2DOWN point", we will later gather the intermediate result "to the home point" */ wait_gather(tag0); FORALLSITES(i,st){ mult_su3_an( &(st->link[dir2]), &(st->link[dir1]), &tmat1 ); mult_su3_nn( &tmat1, (su3_matrix *)gen_pt[0][i], &(st->tempmat1) ); } /* Gather this partial result "up to home site" */ tag1 = start_gather_site( F_OFFSET(tempmat1), sizeof(su3_matrix), OPP_DIR(dir2), EVENANDODD, gen_pt[1] ); /* begin the computation of the "upper" staple. Note that one of the links has already been gathered, since it was used in computing the "lower" staple of the site above us (in dir2) */ wait_gather(tag2); if(start){ /* this is the first contribution to staple */ FORALLSITES(i,st){ mult_su3_nn( &(st->link[dir2]), (su3_matrix *)gen_pt[2][i], &tmat1); mult_su3_na( &tmat1, (su3_matrix *)gen_pt[0][i], &(st->staple) ); } start=0; }
void block_nhyp3() { register int dir, dir2, i; register site *st; Real f[3]; /* related code is specific to SU(3) */ Real ftmp1,ftmp2; su3_matrix tmat, Omega, eQ, Q, Q2; ftmp1=alpha_smear[0]/(6.*(1.-alpha_smear[0])); ftmp2=1.-alpha_smear[0]; for(dir=XUP;dir<=TUP;dir++){ /* compute the staple */ FORALLDYNLINKS(i,st,dir) clear_su3mat(&Staple3[dir][i]); for(dir2=XUP;dir2<=TUP;dir2++) if(dir2!=dir){ #if (SMEAR_LEVEL>1) staple_nhyp(dir,dir2,hyplink2[dir2][dir], hyplink2[dir][dir2],Staple3[dir]); #else /* one-level only */ staple_nhyp(dir,dir2,gauge_field_thin[dir], gauge_field_thin[dir2],Staple3[dir]); #endif } FORALLDYNLINKS(i,st,dir){ /* make Omega */ scalar_mult_add_su3_matrix(gauge_field_thin[dir]+i, Staple3[dir]+i,ftmp1 ,&Q); scalar_mult_su3_matrix(&Q,ftmp2,&Omega); Staple3[dir][i]=Omega; mult_su3_an(&Omega,&Omega,&Q); /* IR regulator, see clover_xxx/defines.h */ scalar_add_diag_su3(&Q,IR_STAB); #ifndef NHYP_DEBUG compute_fhb(&Q,f,NULL, 0); #else compute_fhb(&Omega,&Q,f,NULL, 0); #endif /* make Q**2 */ mult_su3_nn(&Q,&Q,&Q2); /* compute Q^(-1/2) via Eq. 19 */ scalar_mult_su3_matrix(&Q,f[1],&tmat); scalar_mult_add_su3_matrix(&tmat,&Q2,f[2],&eQ); scalar_add_diag_su3(&eQ,f[0]); /* multiply Omega by eQ = (Omega^\dagger Omega)^(-1/2) */ mult_su3_nn(&Omega,&eQ,gauge_field[dir]+i); } } /* dir */
void update_u_cpu( Real eps ){ register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register Real t2,t3,t4,t5,t6,t7,t8; /**TEMP** Real gf_x,gf_av,gf_max; int gf_i,gf_j; **END TEMP **/ /**double dtime,dtime2,dclock();**/ /**dtime = -dclock();**/ /* Take divisions out of site loop (can't be done by compiler) */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; t7 = eps/7.0; t8 = eps/8.0; /** TEMP ** gf_av=gf_max=0.0; **END TEMP**/ #ifdef FN invalidate_fermion_links(fn_links); // free_fn_links(&fn_links); // free_fn_links(&fn_links_dmdu0); #endif FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t8,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t7,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } /**dtime += dclock(); node0_printf("LINK_UPDATE: time = %e mflops = %e\n", dtime, (double)(5616.0*volume/(1.0e6*dtime*numnodes())) );**/ } /* update_u */
static void exp_iQ( su3_matrix *T, su3_matrix *Q ) { complex f[3]; su3_matrix QQ; complex b1[3], b2[3]; int do_bs = 0; mult_su3_nn( Q, Q, &QQ ); get_fs_and_bs_from_Qs( f, b1, b2, Q, &QQ, do_bs); #if 0 { printf("f = (%.10f, %.10f) (%.10f, %.10f) (%.10f, %.10f)\n", f[0].real,f[0].imag,f[1].real,f[1].imag,f[2].real,f[2].imag); get_fs_and_bs_from_Qs ( f, b1, b2, Q, &QQ, 1); printf("f = (%.10f, %.10f) (%.10f, %.10f) (%.10f, %.10f)\n", f[0].real,f[0].imag,f[1].real,f[1].imag,f[2].real,f[2].imag); printf("b1 = (%.10f, %.10f) (%.10f, %.10f) (%.10f, %.10f)\n", b1[0].real,b1[0].imag,b1[1].real,b1[1].imag,b1[2].real,b1[2].imag); printf("b2 = (%.10f, %.10f) (%.10f, %.10f) (%.10f, %.10f)\n", b2[0].real,b2[0].imag,b2[1].real,b2[1].imag,b2[2].real,b2[2].imag); } #endif quadr_comb( T, Q, &QQ, f); }
// ----------------------------------------------------------------- void directional_staple(int dir1, int dir2, field_offset lnk1, field_offset lnk2, su3_matrix *stp) { register int i; register site *s; msg_tag *tag0, *tag1, *tag2; su3_matrix tmat1, tmat2; // Get blocked_link[dir2] from direction dir1 tag0 = start_gather_site(lnk2, sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0]); // Get blocked_link[dir1] from direction dir2 tag1 = start_gather_site(lnk1, sizeof(su3_matrix), dir2, EVENANDODD, gen_pt[1]); // Start working on the lower staple while we wait for the gathers // The lower staple is prepared at x-dir2 and stored in tempmat1, // then gathered to x FORALLSITES(i, s) mult_su3_an((su3_matrix*)F_PT(s,lnk2), (su3_matrix*)F_PT(s,lnk1), tempmat1 + i); wait_gather(tag0); wait_gather(tag1); // Finish lower staple FORALLSITES(i, s) { mult_su3_nn(tempmat1 + i, (su3_matrix *)gen_pt[0][i], &tmat1); su3mat_copy(&tmat1, tempmat1 + i); }
FORALLSITES(i,s){ mult_su3_nn( &LINK(dir1), (su3_matrix *)(gen_pt[1][i]), &tmat1 ); su3_adjoint( &tmat1, &tmat2 ); add_su3_matrix( &FIELD_STRENGTH(component), &tmat1, &FIELD_STRENGTH(component) ); sub_su3_matrix( &FIELD_STRENGTH(component), &tmat2, &FIELD_STRENGTH(component) ); }
/* do measurements: load density, ploop, etc. and phases onto lattice */ void measure() { register int i,j,k, c, is_even; register site *s; int dx,dy,dz; /* separation for correlated observables */ int dir; /* direction of separation */ msg_tag *tag; register complex cc,dd; /*scratch*/ complex ztr, zcof, znum, zdet, TC, zd, density, zphase; complex p[4]; /* probabilities of n quarks at a site */ complex np[4]; /* probabilities at neighbor site */ complex pp[4][4]; /* joint probabilities of n here and m there */ complex zplp, plp_even, plp_odd; Real locphase, phase; /* First make T (= timelike P-loop) from s->ploop_t T stored in s->tempmat1 */ ploop_less_slice(nt-1,EVEN); ploop_less_slice(nt-1,ODD); phase = 0.; density = plp_even = plp_odd = cmplx(0.0, 0.0); for(j=0;j<4;j++){ p[j]=cmplx(0.0,0.0); for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0); } FORALLSITES(i,s) { if(s->t != nt-1) continue; if( ((s->x+s->y+s->z)&0x1)==0 ) is_even=1; else is_even=0; mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1)); zplp = trace_su3(&(s->tempmat1)); if( is_even){CSUM(plp_even, zplp)} else {CSUM(plp_odd, zplp)} ztr = trace_su3(&(s->tempmat1)); CONJG(ztr, zcof); if(is_even){ for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C; zdet = det_su3(&(s->tempmat1)); znum = numer(C, ztr, zcof); CDIV(znum, zdet, zd); CSUM(density, zd); /* store n_quark probabilities at this site in lattice variable qprob[], accumulate sum over lattice in p[] */ cc= cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]); CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]); CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]); cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]); locphase = carg(&zdet); phase += locphase; } }
void stout_smear(su3_matrix *W, su3_matrix *V, su3_matrix *U) { su3_matrix Q, T; get_Q_from_VUadj( &Q, V, U); /* T = exp(iQ) */ exp_iQ( &T, &Q ); /* W = exp(iQ) U */ mult_su3_nn( &T, U, W); }
void update_u( double eps ){ register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register double t2,t3,t4,t5,t6; /**TEMP** double gf_x,gf_av,gf_max; int gf_i,gf_j; **END TEMP **/ /**double dtime,dtime2,dclock();**/ /**dtime = -dclock();**/ /* Temporary by-hand optimization until pgcc compiler bug is fixed */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; /** TEMP ** gf_av=gf_max=0.0; **END TEMP**/ FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/6.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/5.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/4.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/3.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/2.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } #ifdef FN valid_longlinks=0; valid_fatlinks=0; #endif /**dtime += dclock(); node0_printf("LINK_UPDATE: time = %e mflops = %e\n", dtime, (double)(5616.0*volume/(1.0e6*dtime*numnodes())) );**/ } /* update_u */
void static_prop() { register int i; register site *st; msg_tag *tag; int tloop ; int nthalf = nt/2 ; /*************---------**********-------------************/ /* Initialise the gauge part of the propagator ***/ setup_static_prop() ; /* * Calculate the static propagator for positive time * * W(t+1) = W(t) U_4(t) */ for(tloop=1 ; tloop <= nthalf ; ++tloop) { /* The smear_w_line[0] object is used as work space ***/ FORALLSITES(i,st) { mult_su3_nn(&(st->w_line), &(st->link[TUP]), &(st->smear_w_line[0])); } /* Pull the w(t)*u(t) from the previous time slice ***/ tag=start_gather_site( F_OFFSET(smear_w_line[0]), sizeof(su3_matrix), TDOWN, EVENANDODD, gen_pt[0] ); wait_gather(tag); FORALLSITES(i,st) { if( st-> t == tloop ) su3mat_copy((su3_matrix *) gen_pt[0][i], &(st->w_line)); } cleanup_gather(tag); } /* end the loop over time slice ***/
void exp_iQ( su3_matrix *T, su3_matrix *Q ) { complex f[3]; su3_matrix QQ; mult_su3_nn( Q, Q, &QQ ); get_fs_from_Qs( f, Q, &QQ ); /* f[0] + f[1]*Q + f[2]*QQ */ clear_su3mat( T ); T->e[0][0] = f[0]; T->e[1][1] = f[0]; T->e[2][2] = f[0]; c_scalar_mult_add_su3mat( T, Q, &f[1], T ); c_scalar_mult_add_su3mat( T, &QQ, &f[2], T ); }
void gauge_trans(field_offset G) { register int i,mu; site *s; su3_matrix tmp; msg_tag *tag[4]; FORALLUPDIR(mu) tag[mu] = start_gather_site(G,sizeof(su3_matrix),mu,EVENANDODD, gen_pt[mu]); FORALLUPDIR(mu) { wait_gather(tag[mu]); FORALLSITES(i,s) { mult_su3_an((su3_matrix *)F_PT(s,G), &(s->link[mu]), &tmp); mult_su3_nn(&tmp, (su3_matrix *)gen_pt[mu][i], &(s->link[mu])); } cleanup_gather(tag[mu]); }
void update_u(Real eps) { register int i,dir; register site *s; su3_matrix *link,temp1,temp2,htemp; register Real t2,t3,t4,t5,t6; /* Temporary by-hand optimization until pgcc compiler bug is fixed */ t2 = eps/2.0; t3 = eps/3.0; t4 = eps/4.0; t5 = eps/5.0; t6 = eps/6.0; invalidate_fermion_links(fn_links); FORALLSITES(i,s){ for(dir=XUP; dir <=TUP; dir++) if(dir==TUP || s->t>0){ uncompress_anti_hermitian( &(s->mom[dir]) , &htemp ); link = &(s->link[dir]); mult_su3_nn(&htemp,link,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/6.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t6,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/5.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t5,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/4.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t4,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/3.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t3,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); /**scalar_mult_add_su3_matrix(link,&temp1,eps/2.0,&temp2);**/ scalar_mult_add_su3_matrix(link,&temp1,t2,&temp2); mult_su3_nn(&htemp,&temp2,&temp1); scalar_mult_add_su3_matrix(link,&temp1,eps ,&temp2); su3mat_copy(&temp2,link); } } } /* update_u */
FORALLSITES(i,s){ mult_su3_an( (su3_matrix *)(gen_pt[1][i]), &(s->tempmat1), &tmat4 ); mult_su3_nn( &tmat4, (su3_matrix *)(gen_pt[0][i]), &(s->tempmat2) ); }
// Calculate upper staple, add it FORALLSITES(i, s) { mult_su3_nn((su3_matrix*)F_PT(s,lnk2), (su3_matrix *)gen_pt[1][i], &tmat1); mult_su3_na(&tmat1, (su3_matrix *)gen_pt[0][i], &tmat2); add_su3_matrix(stp + i, &tmat2, stp + i); }
void d_plaquette_minmax(double *ss_plaq,double *st_plaq, double *ss_plaq_min, double *st_plaq_min, double *ss_plaq_max, double *st_plaq_max) { /* su3mat is scratch space of size su3_matrix */ su3_matrix *su3mat; register int i,dir1,dir2; register site *s; register int first_pass_s,first_pass_t; register su3_matrix *m1,*m4; su3_matrix mtmp; double ss_sum,st_sum; double rtrace_s, rtrace_t, ss_min, st_min, ss_max, st_max; msg_tag *mtag0,*mtag1; ss_sum = st_sum = 0.0; first_pass_s=1; first_pass_t=1; su3mat = (su3_matrix *)malloc(sizeof(su3_matrix)*sites_on_node); if(su3mat == NULL) { printf("plaquette: can't malloc su3mat\n"); fflush(stdout); terminate(1); } for(dir1=YUP;dir1<=TUP;dir1++){ for(dir2=XUP;dir2<dir1;dir2++){ mtag0 = start_gather_site( F_OFFSET(link[dir2]), sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); mtag1 = start_gather_site( F_OFFSET(link[dir1]), sizeof(su3_matrix), dir2, EVENANDODD, gen_pt[1] ); FORALLSITES(i,s){ m1 = &(s->link[dir1]); m4 = &(s->link[dir2]); mult_su3_an(m4,m1,&su3mat[i]); } wait_gather(mtag0); wait_gather(mtag1); FORALLSITES(i,s){ #ifdef SCHROED_FUN if(dir1==TUP ){ if(s->t==(nt-1)){ mult_su3_nn( &su3mat[i], &(s->boundary[dir2]), &mtmp); } else{ mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); } rtrace_t = realtrace_su3((su3_matrix *)(gen_pt[1][i]), &mtmp); st_sum += rtrace_t; } else if(s->t > 0){ mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); rtrace_s = realtrace_su3((su3_matrix *)(gen_pt[1][i]), &mtmp); ss_sum += rtrace_s; } #else mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); if(dir1==TUP ) { rtrace_t = (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); st_sum += rtrace_t; } else { rtrace_s = (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); ss_sum += rtrace_s; } #endif // printf("Plaq i=%d, dir1=%d, dir2=%d: %f %f\n", // i,dir1,dir2,rtrace_s,rtrace_t); /* set min and max values on the first pass */ if( dir1==TUP ) { if( 1==first_pass_t ) { st_min = rtrace_t; st_max = rtrace_t; first_pass_t = 0; } else { if( rtrace_t < st_min ) st_min = rtrace_t; if( rtrace_t > st_max ) st_max = rtrace_t; } } else { if( 1==first_pass_s ) { ss_min = rtrace_s; ss_max = rtrace_s; first_pass_s = 0; } else { if( rtrace_s < ss_min ) ss_min = rtrace_s; if( rtrace_s > ss_max ) ss_max = rtrace_s; } } } cleanup_gather(mtag0); cleanup_gather(mtag1); } }
void d_plaquette(double *ss_plaq,double *st_plaq) { /* su3mat is scratch space of size su3_matrix */ su3_matrix *su3mat; register int i,dir1,dir2; register site *s; register su3_matrix *m1,*m4; su3_matrix mtmp; double ss_sum,st_sum; msg_tag *mtag0,*mtag1; ss_sum = st_sum = 0.0; su3mat = (su3_matrix *)malloc(sizeof(su3_matrix)*sites_on_node); if(su3mat == NULL) { printf("plaquette: can't malloc su3mat\n"); fflush(stdout); terminate(1); } for(dir1=YUP;dir1<=TUP;dir1++){ for(dir2=XUP;dir2<dir1;dir2++){ mtag0 = start_gather_site( F_OFFSET(link[dir2]), sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); mtag1 = start_gather_site( F_OFFSET(link[dir1]), sizeof(su3_matrix), dir2, EVENANDODD, gen_pt[1] ); FORALLSITES(i,s){ m1 = &(s->link[dir1]); m4 = &(s->link[dir2]); mult_su3_an(m4,m1,&su3mat[i]); } wait_gather(mtag0); wait_gather(mtag1); FORALLSITES(i,s){ #ifdef SCHROED_FUN if(dir1==TUP ){ if(s->t==(nt-1)){ mult_su3_nn( &su3mat[i], &(s->boundary[dir2]), &mtmp); } else{ mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); } st_sum += realtrace_su3((su3_matrix *)(gen_pt[1][i]), &mtmp); } else if(s->t > 0){ mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); ss_sum += realtrace_su3((su3_matrix *)(gen_pt[1][i]), &mtmp); } #else mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); if(dir1==TUP )st_sum += (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); else ss_sum += (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); #endif } cleanup_gather(mtag0); cleanup_gather(mtag1); } }
/* do measurements: load density, ploop, etc. and phases onto lattice */ void measure() { register int i,j,k, c; register site *s; int dx,dy,dz; /* separation for correlated observables */ int dir; /* direction of separation */ msg_tag *tag; register complex cc,dd; /*scratch*/ complex ztr, zcof, znum, zdet, TC, zd, density, zphase; complex p[4]; /* probabilities of n quarks at a site */ complex np[4]; /* probabilities at neighbor site */ complex pp[4][4]; /* joint probabilities of n here and m there */ complex zplp, plp; Real locphase, phase; /* First make T (= timelike P-loop) from s->ploop_t T stored in s->tempmat1 */ ploop_less_slice(nt-1,EVEN); ploop_less_slice(nt-1,ODD); phase = 0.; density = plp = cmplx(0.0, 0.0); for(j=0;j<4;j++){ p[j]=cmplx(0.0,0.0); for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0); } FORALLSITES(i,s) { if(s->t != nt-1) continue; mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1)); zplp = trace_su3(&(s->tempmat1)); CSUM(plp, zplp); ztr = trace_su3(&(s->tempmat1)); CONJG(ztr, zcof); for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C; zdet = det_su3(&(s->tempmat1)); znum = numer(C, ztr, zcof); CDIV(znum, zdet, zd); CSUM(density, zd); /* store n_quark probabilities at this site in lattice variable qprob[], accumulate sum over lattice in p[] */ cc = cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]); CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]); CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]); cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]); locphase = carg(&zdet); phase += locphase; } g_floatsum( &phase ); g_complexsum( &density ); g_complexsum( &p[0] ); g_complexsum( &p[1] ); g_complexsum( &p[2] ); g_complexsum( &p[3] ); g_complexsum( &plp ); CDIVREAL(density,(Real)(nx*ny*nz),density); CDIVREAL(p[0],(Real)(nx*ny*nz),p[0]); CDIVREAL(p[1],(Real)(nx*ny*nz),p[1]); CDIVREAL(p[2],(Real)(nx*ny*nz),p[2]); CDIVREAL(p[3],(Real)(nx*ny*nz),p[3]); CDIVREAL(plp,(Real)(nx*ny*nz),plp); zphase = ce_itheta(phase); if(this_node == 0) { printf("ZMES\t%e\t%e\t%e\t%e\t%e\t%e\n", zphase.real, zphase.imag, density.real, density.imag, plp.real, plp.imag); printf("PMES\t%e\t%e\t%e\t%e\t%e\t%e\t%e\t%e\n", p[0].real, p[0].imag, p[1].real, p[1].imag, p[2].real, p[2].imag, p[3].real, p[3].imag ); } #ifdef PPCORR dx=1; dy=0; dz=0; /* Temporary - right now we just do nearest neighbor */ for(dir=XUP;dir<=ZUP;dir++){ tag = start_gather_site( F_OFFSET(qprob[0]), 4*sizeof(complex), dir, EVENANDODD, gen_pt[0] ); wait_gather(tag); FORALLSITES(i,s)if(s->t==nt-1){ for(j=0;j<4;j++)for(k=0;k<4;k++){ CMUL( (s->qprob)[j],((complex *)gen_pt[0][i])[k],cc); CSUM(pp[j][k],cc); } } cleanup_gather(tag); } /* density correlation format: PP dx dy dz n1 n2 real imag */ for(j=0;j<4;j++)for(k=0;k<4;k++){ g_complexsum( &pp[j][k] ); CDIVREAL(pp[j][k],(Real)(3*nx*ny*nz),pp[j][k]); if(this_node==0) printf("PP %d %d %d %d %d %e %e\n",dx,dy,dz,j,k, pp[j][k].real,pp[j][k].imag); } #endif /*PPCORR*/ }
/* Get the coefficients f of the expansion of exp(iQ) */ static void get_fs_from_Qs( complex f[3], su3_matrix *Q, su3_matrix *QQ){ su3_matrix QQQ; Real trQQQ, trQQ, c0, c1; Real c0abs, c0max, theta; Real eps, sqtwo = sqrt(2.); Real u, w, u_sq, w_sq, xi0; Real cosu, sinu, cosw, sin2u, cos2u, ucosu, usinu, ucos2u, usin2u; Real denom, subexp1, subexp2, subexp3, subexp; mult_su3_nn ( Q, QQ, &QQQ ); trQQ = (trace_su3( QQ )).real; trQQQ = (trace_su3( &QQQ )).real; c0 = 1./3. * trQQQ; c1 = 1./2. * trQQ; if( c1 < 4.0e-3 ) { // RGE: set to 4.0e-3 (CM uses this value). I ran into nans with 1.0e-4 // ======================================================================= // // Corner Case 1: if c1 < 1.0e-4 this implies c0max ~ 3x10^-7 // and in this case the division c0/c0max in arccos c0/c0max can be undefined // and produce NaN's // In this case what we can do is get the f-s a different way. We go back to basics: // // We solve (using maple) the matrix equations using the eigenvalues // // [ 1, q_1, q_1^2 ] [ f_0 ] [ exp( iq_1 ) ] // [ 1, q_2, q_2^2 ] [ f_1 ] = [ exp( iq_2 ) ] // [ 1, q_3, q_3^2 ] [ f_2 ] [ exp( iq_3 ) ] // // with q_1 = 2 u w, q_2 = -u + w, q_3 = - u - w // // with u and w defined as u = sqrt( c_1/ 3 ) cos (theta/3) // and w = sqrt( c_1 ) sin (theta/3) // theta = arccos ( c0 / c0max ) // leaving c0max as a symbol. // // we then expand the resulting f_i as a series around c0 = 0 and c1 = 0 // and then substitute in c0max = 2 ( c_1/ 3)^(3/2) // // we then convert the results to polynomials and take the real and imaginary parts: // we get at the end of the day (to low order) // 1 2 // f0[re] := 1 - --- c0 + h.o.t // 720 // // 1 1 1 2 // f0[im] := - - c0 + --- c0 c1 - ---- c0 c1 + h.o.t // 6 120 5040 // // // 1 1 1 2 // f1[re] := -- c0 - --- c0 c1 + ----- c0 c1 + h.o.t // 24 360 13440 f // // 1 1 2 1 3 1 2 // f1[im] := 1 - - c1 + --- c1 - ---- c1 - ---- c0 + h.o.t // 6 120 5040 5040 // // 1 1 1 2 1 3 1 2 // f2[re] := - - + -- c1 - --- c1 + ----- c1 + ----- c0 + h.o.t // 2 24 720 40320 40320 // // 1 1 1 2 // f2[im] := --- c0 - ---- c0 c1 + ------ c0 c1 + h.o.t // 120 2520 120960 // We then express these using Horner's rule for more stable evaluation. // // ===================================================================== f[0].real = 1. - c0*c0/720.; f[0].imag = -(c0/6.)*(1. - (c1/20.)*(1. - (c1/42.))) ; f[1].real = c0/24.*(1. - c1/15.*(1. - 3.*c1/112.)) ; f[1].imag = 1.-c1/6.*(1. - c1/20.*(1. - c1/42.)) - c0*c0/5040. ; f[2].real = 0.5*(-1. + c1/12.*(1. - c1/30.*(1. - c1/56.)) + c0*c0/20160.); f[2].imag = 0.5*(c0/60.*(1. - c1/21.*(1. - c1/48.))); } else { // ======================================================================= // Normal case: Do as per Morningstar-Peardon paper // ======================================================================= c0abs = fabs( c0 ); c0max = 2*pow( c1/3., 1.5); // ======================================================================= // Now work out theta. In the paper the case where c0 -> c0max even when c1 is reasonable // Has never been considered, even though it can arise and can cause the arccos function // to fail // Here we handle it with series expansion // ======================================================================= eps = (c0max - c0abs)/c0max; if( eps < 0 ) { // ===================================================================== // Corner Case 2: Handle case when c0abs is bigger than c0max. // This can happen only when there is a rounding error in the ratio, and that the // ratio is really 1. This implies theta = 0 which we'll just set. // ===================================================================== theta = 0; } else if ( eps < 1.0e-3 ) { // ===================================================================== // Corner Case 3: c0->c0max even though c1 may be actually quite reasonable. // The ratio |c0|/c0max -> 1 but is still less than one, so that a // series expansion is possible. // SERIES of acos(1-epsilon): Good to O(eps^6) or with this cutoff to O(10^{-18}) Computed with Maple. // BTW: 1-epsilon = 1 - (c0max-c0abs)/c0max = 1-(1 - c0abs/c0max) = +c0abs/c0max // // ====================================================================== theta = sqtwo*sqrt(eps)*( 1 + ( (1./12.) + ( (3./160.) + ( (5./896.) + ( (35./18432.) + (63./90112.)*eps ) *eps) *eps) *eps) *eps); } else { theta = acos( c0abs/c0max ); } u = sqrt(c1/3.)*cos(theta/3.); w = sqrt(c1)*sin(theta/3.); u_sq = u*u; w_sq = w*w; if( fabs(w) < 0.05 ) { xi0 = 1. - (1./6.)*w_sq*( 1. - (1./20.)*w_sq*( 1. - (1./42.)*w_sq ) ); } else { xi0 = sin(w)/w; } cosu = cos(u); sinu = sin(u); cosw = cos(w); sin2u = sin(2*u); cos2u = cos(2*u); ucosu = u*cosu; usinu = u*sinu; ucos2u = u*cos2u; usin2u = u*sin2u; denom = 9.*u_sq - w_sq; subexp1 = u_sq - w_sq; subexp2 = 8*u_sq*cosw; subexp3 = (3*u_sq + w_sq)*xi0; f[0].real = ( (subexp1)*cos2u + cosu*subexp2 + 2*usinu*subexp3 ) / denom ; f[0].imag = ( (subexp1)*sin2u - sinu*subexp2 + 2*ucosu*subexp3 ) / denom ; subexp = (3*u_sq -w_sq)*xi0; f[1].real = (2*(ucos2u - ucosu*cosw)+subexp*sinu)/denom; f[1].imag = (2*(usin2u + usinu*cosw)+subexp*cosu)/denom; subexp=3*xi0; f[2].real = (cos2u - cosu*cosw -usinu*subexp) /denom ; f[2].imag = (sin2u + sinu*cosw -ucosu*subexp) /denom ; if( c0 < 0 ) { // f[0] = conj(f[0]); f[0].imag *= -1; //f[1] = -conj(f[1]); f[1].real *= -1; //f[2] = conj(f[2]); f[2].imag *= -1; } } // End of if( corner_caseP ) else {} }
void d_plaquette_field_hist(su3_matrix **U_field, int Npowers, int *Nhist, double **hist, double **hist_bounds, double *ss_plaq, double *st_plaq) { /* su3mat is scratch space of size su3_matrix */ su3_matrix *su3mat; register int i,dir1,dir2; register int ipower,ihist; register site *s; register su3_matrix *m1,*m4; double *plaq_power, *step_hist; su3_matrix mtmp; double ss_sum,st_sum; double rtrace, rtrace3; msg_tag *mtag0,*mtag1; ss_sum = st_sum = 0.0; #ifdef HISQ_DUMP_PLAQ_INTO_FILE FILE *fp; char plaq_file_name[300]; #endif /* HISQ_DUMP_PLAQ_INTO_FILE */ su3mat = (su3_matrix *)malloc(sizeof(su3_matrix)*sites_on_node); if(su3mat == NULL) { printf("plaquette: can't malloc su3mat\n"); fflush(stdout); terminate(1); } /* zero out the histogram */ for(ipower=0;ipower<Npowers;ipower++) { for(ihist=0;ihist<Nhist[ipower];ihist++) { hist[ipower][ihist]=0.0; } } /* array with powers of (3-plaquette) */ plaq_power=(double*)malloc(sizeof(double)*Npowers); /* array with step sizes */ step_hist=(double*)malloc(sizeof(double)*Npowers); for(ipower=0;ipower<Npowers;ipower++) { step_hist[ipower]= (hist_bounds[ipower][1]-hist_bounds[ipower][0])/Nhist[ipower]; } #ifdef HISQ_DUMP_PLAQ_INTO_FILE sprintf( plaq_file_name, "plaq_W_node%04d.dat", this_node ); fp = fopen( plaq_file_name, "wt" ); #endif /* HISQ_DUMP_PLAQ_INTO_FILE */ for(dir1=YUP;dir1<=TUP;dir1++){ for(dir2=XUP;dir2<dir1;dir2++){ mtag0 = start_gather_field( U_field[dir2], sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); mtag1 = start_gather_field( U_field[dir1], sizeof(su3_matrix), dir2, EVENANDODD, gen_pt[1] ); FORALLSITES(i,s){ m1 = &(U_field[dir1][i]); m4 = &(U_field[dir2][i]); mult_su3_an(m4,m1,&su3mat[i]); } wait_gather(mtag0); wait_gather(mtag1); FORALLSITES(i,s){ mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); if(dir1==TUP ) { rtrace = (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); st_sum += rtrace; } else { rtrace = (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); ss_sum += rtrace; } // printf("Plaq i=%d, dir1=%d, dir2=%d: %f %f\n", // i,dir1,dir2,rtrace_s,rtrace_t); #ifdef HISQ_DUMP_PLAQ_INTO_FILE fprintf( fp, "%18.12g\n", rtrace ); #endif /* HISQ_DUMP_PLAQ_INTO_FILE */ /* powers of (3-plaquette) */ rtrace3=3.0-rtrace; plaq_power[0]=rtrace3; for(ipower=1;ipower<Npowers;ipower++) { plaq_power[ipower]=plaq_power[ipower-1]*rtrace3; } /* find histogram entry */ for(ipower=0;ipower<Npowers;ipower++) { if( (plaq_power[ipower]>hist_bounds[ipower][0]) && (plaq_power[ipower]<hist_bounds[ipower][1]) ) { ihist=(int)( (plaq_power[ipower]-hist_bounds[ipower][0])/ step_hist[ipower] ); hist[ipower][ihist]+=1.0; } } } cleanup_gather(mtag0); cleanup_gather(mtag1); }
static void get_fs_and_bs_from_Qs( complex f[3], complex b1[3], complex b2[3], su3_matrix *Q, su3_matrix *QQ, int do_bs ) { su3_matrix QQQ; Real trQQQ, trQQ, c0, c1; Real c0abs, c0max, theta; Real eps, sqtwo = sqrt(2.); Real u, w, u_sq, w_sq, xi0, xi1; Real cosu, sinu, cosw, sinw, sin2u, cos2u, ucosu, usinu, ucos2u, usin2u; Real denom; Real r_1_re[3], r_1_im[3], r_2_re[3], r_2_im[3]; Real b_denom; mult_su3_nn ( Q, QQ, &QQQ ); trQQ = (trace_su3( QQ )).real; trQQQ = (trace_su3( &QQQ )).real; c0 = 1./3. * trQQQ; c1 = 1./2. * trQQ; if( c1 < 4.0e-3 ) { // RGE: set to 4.0e-3 (CM uses this value). I ran into nans with 1.0e-4 // ======================================================================= // // Corner Case 1: if c1 < 1.0e-4 this implies c0max ~ 3x10^-7 // and in this case the division c0/c0max in arccos c0/c0max can be undefined // and produce NaN's // In this case what we can do is get the f-s a different way. We go back to basics: // // We solve (using maple) the matrix equations using the eigenvalues // // [ 1, q_1, q_1^2 ] [ f_0 ] [ exp( iq_1 ) ] // [ 1, q_2, q_2^2 ] [ f_1 ] = [ exp( iq_2 ) ] // [ 1, q_3, q_3^2 ] [ f_2 ] [ exp( iq_3 ) ] // // with q_1 = 2 u w, q_2 = -u + w, q_3 = - u - w // // with u and w defined as u = sqrt( c_1/ 3 ) cos (theta/3) // and w = sqrt( c_1 ) sin (theta/3) // theta = arccos ( c0 / c0max ) // leaving c0max as a symbol. // // we then expand the resulting f_i as a series around c0 = 0 and c1 = 0 // and then substitute in c0max = 2 ( c_1/ 3)^(3/2) // // we then convert the results to polynomials and take the real and imaginary parts: // we get at the end of the day (to low order) // 1 2 // f0[re] := 1 - --- c0 + h.o.t // 720 // // 1 1 1 2 // f0[im] := - - c0 + --- c0 c1 - ---- c0 c1 + h.o.t // 6 120 5040 // // // 1 1 1 2 // f1[re] := -- c0 - --- c0 c1 + ----- c0 c1 + h.o.t // 24 360 13440 f // // 1 1 2 1 3 1 2 // f1[im] := 1 - - c1 + --- c1 - ---- c1 - ---- c0 + h.o.t // 6 120 5040 5040 // // 1 1 1 2 1 3 1 2 // f2[re] := - - + -- c1 - --- c1 + ----- c1 + ----- c0 + h.o.t // 2 24 720 40320 40320 // // 1 1 1 2 // f2[im] := --- c0 - ---- c0 c1 + ------ c0 c1 + h.o.t // 120 2520 120960 // We then express these using Horner's rule for more stable evaluation. // // to get the b-s we use the fact that // b2_i = d f_i / d c0 // and b1_i = d f_i / d c1 // // where the derivatives are partial derivatives // // And we just differentiate the polynomials above (keeping the same level // of truncation) and reexpress that as Horner's rule // // This clearly also handles the case of a unit gauge as no c1, u etc appears in the // denominator and the arccos is never taken. In this case, we have the results in // the raw c0, c1 form and we don't need to flip signs and take complex conjugates. // // (not CD) I checked the expressions below by taking the difference between the Horner forms // below from the expanded forms (and their derivatives) above and checking for the // differences to be zero. At this point in time maple seems happy. // ================================================================== f[0].real = 1. - c0*c0/720.; f[0].imag = -(c0/6.)*(1. - (c1/20.)*(1. - (c1/42.))) ; f[1].real = c0/24.*(1. - c1/15.*(1. - 3.*c1/112.)) ; f[1].imag = 1.-c1/6.*(1. - c1/20.*(1. - c1/42.)) - c0*c0/5040. ; f[2].real = 0.5*(-1. + c1/12.*(1. - c1/30.*(1. - c1/56.)) + c0*c0/20160.); f[2].imag = 0.5*(c0/60.*(1. - c1/21.*(1. - c1/48.))); if( do_bs ) { // partial f0/ partial c0 b2[0].real = -c0/360.; b2[0].imag = -(1./6.)*(1.-(c1/20.)*(1.-c1/42.)); // partial f0 / partial c1 // b1[0].real = 0; b1[0].imag = (c0/120.)*(1.-c1/21.); // partial f1 / partial c0 // b2[1].real = (1./24.)*(1.-c1/15.*(1.-3.*c1/112.)); b2[1].imag = -c0/2520.; // partial f1 / partial c1 b1[1].real = -c0/360.*(1. - 3.*c1/56. ); b1[1].imag = -1./6.*(1.-c1/10.*(1.-c1/28.)); // partial f2/ partial c0 b2[2].real = 0.5*c0/10080.; b2[2].imag = 0.5*( 1./60.*(1.-c1/21.*(1.-c1/48.)) ); // partial f2/ partial c1 b1[2].real = 0.5*( 1./12.*(1.-(2.*c1/30.)*(1.-3.*c1/112.)) ); b1[2].imag = 0.5*( -c0/1260.*(1.-c1/24.) ); } // do_bs } else { // ======================================================================= // Normal case: Do as per Morningstar-Peardon paper // ======================================================================= c0abs = fabs( c0 ); c0max = 2*pow( c1/3., 1.5); // ======================================================================= // Now work out theta. In the paper the case where c0 -> c0max even when c1 is reasonable // Has never been considered, even though it can arise and can cause the arccos function // to fail // Here we handle it with series expansion // ======================================================================= eps = (c0max - c0abs)/c0max; if( eps < 0 ) { // ===================================================================== // Corner Case 2: Handle case when c0abs is bigger than c0max. // This can happen only when there is a rounding error in the ratio, and that the // ratio is really 1. This implies theta = 0 which we'll just set. // ===================================================================== theta = 0; } else if ( eps < 1.0e-3 ) { // ===================================================================== // Corner Case 3: c0->c0max even though c1 may be actually quite reasonable. // The ratio |c0|/c0max -> 1 but is still less than one, so that a // series expansion is possible. // SERIES of acos(1-epsilon): Good to O(eps^6) or with this cutoff to O(10^{-18}) Computed with Maple. // BTW: 1-epsilon = 1 - (c0max-c0abs)/c0max = 1-(1 - c0abs/c0max) = +c0abs/c0max // // ====================================================================== theta = sqtwo*sqrt(eps)*( 1 + ( (1./12.) + ( (3./160.) + ( (5./896.) + ( (35./18432.) + (63./90112.)*eps ) *eps) *eps) *eps) *eps); } else { // theta = acos( c0abs/c0max ); } u = sqrt(c1/3.)*cos(theta/3.); w = sqrt(c1)*sin(theta/3.); u_sq = u*u; w_sq = w*w; if( fabs(w) < 0.05 ) { xi0 = 1. - (1./6.)*w_sq*( 1. - (1./20.)*w_sq*( 1. - (1./42.)*w_sq ) ); } else { xi0 = sin(w)/w; } if( do_bs) { if( fabs(w) < 0.05 ) { xi1 = -( 1./3. - (1./30.)*w_sq*( 1. - (1./28.)*w_sq*( 1. - (1./54.)*w_sq ) ) ); } else { xi1 = cos(w)/w_sq - sin(w)/(w_sq*w); } } cosu = cos(u); sinu = sin(u); cosw = cos(w); sinw = sin(w); sin2u = sin(2*u); cos2u = cos(2*u); ucosu = u*cosu; usinu = u*sinu; ucos2u = u*cos2u; usin2u = u*sin2u; denom = 9.*u_sq - w_sq; { Real subexp1, subexp2, subexp3; subexp1 = u_sq - w_sq; subexp2 = 8*u_sq*cosw; subexp3 = (3*u_sq + w_sq)*xi0; f[0].real = ( (subexp1)*cos2u + cosu*subexp2 + 2*usinu*subexp3 ) / denom ; f[0].imag = ( (subexp1)*sin2u - sinu*subexp2 + 2*ucosu*subexp3 ) / denom ; } { Real subexp; subexp = (3*u_sq -w_sq)*xi0; f[1].real = (2*(ucos2u - ucosu*cosw)+subexp*sinu)/denom; f[1].imag = (2*(usin2u + usinu*cosw)+subexp*cosu)/denom; } { Real subexp; subexp=3*xi0; f[2].real = (cos2u - cosu*cosw -usinu*subexp) /denom ; f[2].imag = (sin2u + sinu*cosw -ucosu*subexp) /denom ; } if( do_bs ) { { Real subexp1, subexp2, subexp3; // r_1[0]=Double(2)*cmplx(u, u_sq-w_sq)*exp2iu // + 2.0*expmiu*( cmplx(8.0*u*cosw, -4.0*u_sq*cosw) // + cmplx(u*(3.0*u_sq+w_sq),9.0*u_sq+w_sq)*xi0 ); subexp1 = u_sq - w_sq; subexp2 = 8.*cosw + (3.*u_sq + w_sq)*xi0 ; subexp3 = 4.*u_sq*cosw - (9.*u_sq + w_sq)*xi0 ; r_1_re[0] = 2.*(ucos2u - sin2u *(subexp1)+ucosu*( subexp2 )- sinu*( subexp3 ) ); r_1_im[0] = 2.*(usin2u + cos2u *(subexp1)-usinu*( subexp2 )- cosu*( subexp3 ) ); } { Real subexp1, subexp2; // r_1[1]=cmplx(2.0, 4.0*u)*exp2iu + expmiu*cmplx(-2.0*cosw-(w_sq-3.0*u_sq)*xi0,2.0*u*cosw+6.0*u*xi0); subexp1 = cosw+3.*xi0; subexp2 = 2.*cosw + xi0*(w_sq - 3.*u_sq); r_1_re[1] = 2.*((cos2u - 2.*usin2u) + usinu*subexp1) - cosu*subexp2; r_1_im[1] = 2.*((sin2u + 2.*ucos2u) + ucosu*subexp1) + sinu*subexp2; } { Real subexp; // r_1[2]=2.0*timesI(exp2iu) +expmiu*cmplx(-3.0*u*xi0, cosw-3*xi0); subexp = cosw - 3.*xi0; r_1_re[2] = -2.*sin2u -3.*ucosu*xi0 + sinu*subexp; r_1_im[2] = 2.*cos2u +3.*usinu*xi0 + cosu*subexp; } { Real subexp; //r_2[0]=-2.0*exp2iu + 2*cmplx(0,u)*expmiu*cmplx(cosw+xi0+3*u_sq*xi1, // 4*u*xi0); subexp = cosw + xi0 + 3.*u_sq*xi1; r_2_re[0] = -2.*(cos2u + u*( 4.*ucosu*xi0 - sinu*subexp) ); r_2_im[0] = -2.*(sin2u - u*( 4.*usinu*xi0 + cosu*subexp) ); } { Real subexp; // r_2[1]= expmiu*cmplx(cosw+xi0-3.0*u_sq*xi1, 2.0*u*xi0); // r_2[1] = timesMinusI(r_2[1]); subexp = cosw + xi0 - 3.*u_sq*xi1; r_2_re[1] = 2.*ucosu*xi0 - sinu*subexp; r_2_im[1] = -2.*usinu*xi0 - cosu*subexp; } { Real subexp; //r_2[2]=expmiu*cmplx(xi0, -3.0*u*xi1); subexp = 3.*xi1; r_2_re[2] = cosu*xi0 - usinu*subexp ; r_2_im[2] = -( sinu*xi0 + ucosu*subexp ) ; } b_denom=2.*denom*denom; { Real subexp1, subexp2, subexp3; int j; subexp1 = 2.*u; subexp2 = 3.*u_sq - w_sq; subexp3 = 2.*(15.*u_sq + w_sq); for(j=0; j < 3; j++) { b1[j].real=( subexp1*r_1_re[j] + subexp2*r_2_re[j] - subexp3*f[j].real )/b_denom; b1[j].imag=( subexp1*r_1_im[j] + subexp2*r_2_im[j] - subexp3*f[j].imag )/b_denom; } } { Real subexp1, subexp2; int j; subexp1 = 3.*u; subexp2 = 24.*u; for(j=0; j < 3; j++) { b2[j].real=( r_1_re[j] - subexp1*r_2_re[j] - subexp2 * f[j].real )/b_denom; b2[j].imag=( r_1_im[j] - subexp1*r_2_im[j] - subexp2 * f[j].imag )/b_denom; } } // Now flip the coefficients of the b-s if( c0 < 0 ) { //b1_site[0] = conj(b1_site[0]); b1[0].imag *= -1; //b1_site[1] = -conj(b1_site[1]); b1[1].real *= -1; //b1_site[2] = conj(b1_site[2]); b1[2].imag *= -1; //b2_site[0] = -conj(b2_site[0]); b2[0].real *= -1; //b2_site[1] = conj(b2_site[1]); b2[1].imag *= -1; //b2_site[2] = -conj(b2_site[2]); b2[2].real *= -1; } } // end of if (do_bs) // Now when everything is done flip signs of the b-s (can't do this before // as the unflipped f-s are needed to find the b-s if( c0 < 0 ) { // f[0] = conj(f[0]); f[0].imag *= -1; //f[1] = -conj(f[1]); f[1].real *= -1; //f[2] = conj(f[2]); f[2].imag *= -1; } } // End of if( corner_caseP ) else {} }
FORALLSITES(i,s){ mult_su3_nn( (su3_matrix *)(gen_pt[0][i]), &LINK(dir1), &temp1[i] ); mult_su3_nn( (su3_matrix *)(gen_pt[1][i]), &LINK(dir0), &temp2[i] ); }
/* Now make +mu +nu plaquette and put in f_mn */ FORALLSITES(i,s){ mult_su3_nn( &(s->link[mu]), ((su3_matrix *)F_PT(s,f_mn)), &tmat4 ); mult_su3_na( &tmat4, &(s->link[nu]), ((su3_matrix *)F_PT(s,f_mn)) ); }
FORALLSITES(i,s){ mult_su3_an( &LINK(dir1), &LINK(dir0), &tmat1); mult_su3_nn( &tmat1, (su3_matrix *)(gen_pt[1][i]), &temp1[i] ); }
/* compute wiggly links in direction dir1 decorated in direction dir2 */ void hyp_block_stage1(register int dir1, register int dir2, int parity, su3_matrix *U_link, su3_matrix *Wiggly_link, int dir_exclude, hyp_coeffs_t *hc ) { register int i; register site *st; msg_tag *tag0,*tag1,*tag2,*tag3,*tag4; int start; register int count,nWiggly; su3_matrix tmat1,tmat2,fatq; su3_matrix *tempmat1; int disp[4]; /* displacement vector for general gather */ /* create temporary storage, one matrix per site */ tempmat1 = create_mn_special(1); start=1; /* indicates staple sum not initialized */ // array size is fixed for 4D, in 3D not all entries are filled nWiggly = 12; count=3*dir1+dir2; if(dir2>dir1)count=count-1; /* displacement vector for link 2 sites away */ for(i=XUP;i<=TUP;i++)disp[i]=0; disp[dir1] = 1; disp[dir2] = -1; /* get U_link[dir2] from direction dir1 */ tag0 = declare_strided_gather( U_link + dir2, 4*sizeof(su3_matrix), sizeof(su3_matrix), dir1, parity, gen_pt[0] ); do_gather( tag0 ); /* get U_link[dir1] from direction dir2 */ tag1 = declare_strided_gather( U_link + dir1, 4*sizeof(su3_matrix), sizeof(su3_matrix), dir2, parity, gen_pt[1] ); do_gather( tag1 ); /* get U_link[dir2] from direction -dir2 */ tag2 = declare_strided_gather( U_link + dir2, 4*sizeof(su3_matrix), sizeof(su3_matrix), OPP_DIR(dir2), parity, gen_pt[2] ); do_gather( tag2 ); /* get U_link[dir1] from direction -dir2 */ tag3 = declare_strided_gather( U_link + dir1, 4*sizeof(su3_matrix), sizeof(su3_matrix), OPP_DIR(dir2), parity, gen_pt[3] ); do_gather( tag3 ); /* get U_link[dir2] from displacement +dir1-dir2 */ tag4 = start_general_strided_gather( (char *)(U_link + dir2), 4*sizeof(su3_matrix), sizeof(su3_matrix), disp, parity, gen_pt[4] ); /* Upper staple */ wait_gather(tag0); wait_gather(tag1); if(start){ /* this is the first contribution to staple */ FORSOMEPARITY(i,st,parity){ mult_su3_nn( &(U_link[4*i+dir2]), (su3_matrix *)gen_pt[1][i], &tmat1 ); mult_su3_na( &tmat1, (su3_matrix *)gen_pt[0][i], &(tempmat1[i]) ); } start=0; }
void make_field_strength( field_offset link_src, /* field offset for su3_matrix[4] type for the source link matrices */ field_offset field_dest /* field offset for su3_matrix[6] type for the resulting field strength */ ) { register int i,component,dir0=-99,dir1=-99; register site *s; int j; su3_matrix tmat1,tmat2; su3_matrix *temp1,*temp2; complex cc; msg_tag *mtag0,*mtag1; /* Allocate temporary space for two su3_matrix fields */ temp1 = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix)); if(temp1 == NULL){ printf("field_strength: No room for temp1\n"); terminate(1); } temp2 = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix)); if(temp2 == NULL){ printf("field_strength: No room for temp2\n"); terminate(1); } for(component=FS_XY;component<=FS_ZT;component++){ switch(component){ case FS_XY: dir0=XUP; dir1=YUP; break; case FS_XZ: dir0=XUP; dir1=ZUP; break; case FS_YZ: dir0=YUP; dir1=ZUP; break; case FS_XT: dir0=XUP; dir1=TUP; break; case FS_YT: dir0=YUP; dir1=TUP; break; case FS_ZT: dir0=ZUP; dir1=TUP; break; } /* Plaquette in +dir0 +dir1 direction */ mtag0 = start_gather_site( LINK_OFFSET(dir0), sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); mtag1 = start_gather_site( LINK_OFFSET(dir1), sizeof(su3_matrix), dir0, EVENANDODD, gen_pt[1] ); wait_gather(mtag0); wait_gather(mtag1); FORALLSITES(i,s){ mult_su3_nn( &LINK(dir0), (su3_matrix *)(gen_pt[1][i]), &tmat1 ); mult_su3_na( &tmat1, (su3_matrix *)(gen_pt[0][i]), &tmat2 ); mult_su3_na( &tmat2, &LINK(dir1), &tmat1 ); su3_adjoint( &tmat1, &tmat2 ); sub_su3_matrix( &tmat1, &tmat2, &FIELD_STRENGTH(component) ); } /**cleanup_gather(mtag0); Use same gather in next plaquette**/ cleanup_gather(mtag1); /* Plaquette in -dir0 +dir1 direction */ /**mtag0 = start_gather_site( LINK_OFFSET(dir0), sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); wait_gather(mtag0); Already gathered above**/ FORALLSITES(i,s){ mult_su3_an( &LINK(dir1), &LINK(dir0), &tmat1 ); mult_su3_an( (su3_matrix *)(gen_pt[0][i]), &tmat1, &temp1[i] ); }
void stout_force_terms(su3_matrix *force_diag, su3_matrix *ILambda, su3_matrix *V, su3_matrix *U, su3_matrix *force_W) { su3_matrix Q, QQ, USigp; su3_matrix Gamma; complex f[3], b1[3], b2[3]; complex plusI = cmplx(0.,1.); int do_bs = 1; get_Q_from_VUadj( &Q, V, U); mult_su3_nn( &Q, &Q, &QQ ); get_fs_and_bs_from_Qs( f, b1, b2, &Q, &QQ, do_bs); { int i, j; printf("Result Q\n"); for(i = 0; i < 3; i++){ for(j = 0; j < 3; j++) printf("%f + %f*I, ",Q.e[i][j].real, Q.e[i][j].imag); printf("\n"); } } { su3_matrix tmp; /* tmp = exp(iQ) */ quadr_comb( &tmp, &Q, &QQ, f); { int i, j; printf("Result exp(iQ)\n"); for(i = 0; i < 3; i++){ for(j = 0; j < 3; j++) printf("%f + %f*I, ",tmp.e[i][j].real, tmp.e[i][j].imag); printf("\n"); } } /* Note force_W is U' Sigma' = exp(iQ) U Sigma' in MP notation */ /* force_diag = exp(-iQ) * force_W * exp(iQ) = first term in MP (75) */ mult_su3_an( &tmp, force_W, &USigp ); mult_su3_nn( &USigp, &tmp, force_diag ); } /* Construction of Gamma from MP Eq (74) */ { complex tr[3]; su3_matrix B1, B2; /* B1 = b1[0] + b1[1]*Q + b1[2]*Q^2 */ /* B2 = b2[0] + b2[1]*Q + b2[2]*Q^2 */ quadr_comb( &B1, &Q, &QQ, b1 ); quadr_comb( &B2, &Q, &QQ, b2 ); /* Gamma = tr(U * Sigma' * B1) Q + tr(U * Sigma' * B2) Q^2 */ tr[0] = cmplx(0,0); tr[1] = complextrace_su3_nn( &USigp, &B1); tr[2] = complextrace_su3_nn( &USigp, &B2); quadr_comb( &Gamma, &Q, &QQ, tr ); } { su3_matrix tmp; /* Gamma += f_1 * U * Sigma' */ c_scalar_mult_add_su3mat( &Gamma, &USigp, &f[1], &Gamma); /* Gamma += f_2 * Q * U * Sigma' */ mult_su3_nn( &Q, &USigp, &tmp ); c_scalar_mult_add_su3mat( &Gamma, &tmp, &f[2], &Gamma); /* Gamma += f_2 * U * Sigma' * Q */ mult_su3_nn( &USigp, &Q, &tmp ); c_scalar_mult_add_su3mat( &Gamma, &tmp, &f[2], &Gamma); } /* Lambda is the traceless-hermitian part of I*Gamma */ traceless_hermitian_su3( ILambda, &Gamma); /* Multiply by I to make traceless antihermitian */ c_scalar_mult_su3mat( ILambda, &plusI, ILambda ); } /* stout_force_terms */