/* do measurements: load density, ploop, etc. and phases onto lattice */ void measure() { register int i,j,k, c, is_even; register site *s; int dx,dy,dz; /* separation for correlated observables */ int dir; /* direction of separation */ msg_tag *tag; register complex cc,dd; /*scratch*/ complex ztr, zcof, znum, zdet, TC, zd, density, zphase; complex p[4]; /* probabilities of n quarks at a site */ complex np[4]; /* probabilities at neighbor site */ complex pp[4][4]; /* joint probabilities of n here and m there */ complex zplp, plp_even, plp_odd; Real locphase, phase; /* First make T (= timelike P-loop) from s->ploop_t T stored in s->tempmat1 */ ploop_less_slice(nt-1,EVEN); ploop_less_slice(nt-1,ODD); phase = 0.; density = plp_even = plp_odd = cmplx(0.0, 0.0); for(j=0;j<4;j++){ p[j]=cmplx(0.0,0.0); for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0); } FORALLSITES(i,s) { if(s->t != nt-1) continue; if( ((s->x+s->y+s->z)&0x1)==0 ) is_even=1; else is_even=0; mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1)); zplp = trace_su3(&(s->tempmat1)); if( is_even){CSUM(plp_even, zplp)} else {CSUM(plp_odd, zplp)} ztr = trace_su3(&(s->tempmat1)); CONJG(ztr, zcof); if(is_even){ for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C; zdet = det_su3(&(s->tempmat1)); znum = numer(C, ztr, zcof); CDIV(znum, zdet, zd); CSUM(density, zd); /* store n_quark probabilities at this site in lattice variable qprob[], accumulate sum over lattice in p[] */ cc= cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]); CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]); CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]); cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]); locphase = carg(&zdet); phase += locphase; } }
/* Make traceless */ FORALLSITES(i,s){ cc = trace_su3(&FIELD_STRENGTH(component)); CMULREAL(cc,0.33333333333333333,cc); for(j=0;j<3;j++) CSUB(FIELD_STRENGTH(component).e[j][j],cc, FIELD_STRENGTH(component).e[j][j]); }
double imp_gauge_action() { register int i; int rep; register site *s; complex trace; double g_action; double action,act2,total_action; su3_matrix *tempmat1; int length; /* these are for loop_table */ int ln,iloop; g_action=0.0; tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("imp_gauge_action: Can't malloc temporary\n"); terminate(1); } /* gauge action */ for(iloop=0;iloop<NLOOP;iloop++){ length=loop_length[iloop]; /* loop over rotations and reflections */ for(ln=0;ln<loop_num[iloop];ln++){ path_product( loop_table[iloop][ln] , length, tempmat1 ); FORALLSITES(i,s){ trace=trace_su3( &tempmat1[i] ); action = 3.0 - (double)trace.real; /* need the "3 -" for higher characters */ total_action= (double)loop_coeff[iloop][0]*action; act2=action; for(rep=1;rep<NREPS;rep++){ act2 *= action; total_action += (double)loop_coeff[iloop][rep]*act2; } g_action += total_action; } END_LOOP /* sites */ } /* ln */ } /* iloop */
/* a = traceless-hermitian part of b. b and a may be equivalent. */ static void traceless_hermitian_su3(su3_matrix *a, su3_matrix *b) { complex t; su3_matrix c; su3_adjoint( b, &c ); add_su3_matrix( &c, b, a ); t = trace_su3( a ); CDIVREAL(t, 3., t); CSUB(a->e[0][0],t,a->e[0][0]); CSUB(a->e[1][1],t,a->e[1][1]); CSUB(a->e[2][2],t,a->e[2][2]); scalar_mult_su3_matrix( a, 0.5, a ); }
double imp_gauge_action() { register int i; int rep; register site *s; complex trace; double g_action; double action,act2,total_action; int length; /* these are for loop_table */ int ln,iloop; g_action=0.0; /* gauge action */ for(iloop=0;iloop<NLOOP;iloop++){ length=loop_length[iloop]; /* loop over rotations and reflections */ for(ln=0;ln<loop_num[iloop];ln++){ path_product( loop_table[iloop][ln] , length ); FORALLSITES(i,s){ trace=trace_su3( &s->tempmat1 ); action = 3.0 - (double)trace.real; /* need the "3 -" for higher characters */ total_action= (double)loop_coeff[iloop][0]*action; act2=action; for(rep=1;rep<NREPS;rep++){ act2 *= action; total_action += (double)loop_coeff[iloop][rep]*act2; } g_action += total_action; } END_LOOP /* sites */ } /* ln */ } /* iloop */
static void get_Q_from_VUadj(su3_matrix *Q, su3_matrix *V, su3_matrix *U){ complex x; complex tr; su3_matrix Om; x = cmplx(0, 0.5); /* i/2 */ /* Om = V U^adj */ mult_su3_na( V, U, &Om ); /* Q = i/2(Om^adj - Om) */ su3_adjoint( &Om, Q ); sub_su3_matrix( Q, &Om, &Om ); c_scalar_mult_su3mat( &Om, &x, Q ); /* Q = Q - Tr Q/3 */ tr = trace_su3( Q ); CDIVREAL(tr, 3., tr); CSUB(Q->e[0][0],tr,Q->e[0][0]); CSUB(Q->e[1][1],tr,Q->e[1][1]); CSUB(Q->e[2][2],tr,Q->e[2][2]); }
void gluon_prop( void ) { register int i,dir; register int pmu; register site *s; anti_hermitmat ahtmp; Real pix, piy, piz, pit; Real sin_pmu, sin_pmu2, prop_s, prop_l, ftmp1, ftmp2; complex ctmp; su3_matrix mat; struct { Real f1, f2; } msg; double trace, dmuAmu; int px, py, pz, pt; int currentnode,newnode; pix = PI / (Real)nx; piy = PI / (Real)ny; piz = PI / (Real)nz; pit = PI / (Real)nt; trace = 0.0; /* Make A_mu as anti-hermition traceless part of U_mu */ /* But store as SU(3) matrix for call to FFT */ for(dir=XUP; dir<=TUP; dir++) { FORALLSITES(i,s){ trace += (double)(trace_su3( &(s->link[dir]))).real; make_anti_hermitian( &(s->link[dir]), &ahtmp); uncompress_anti_hermitian( &ahtmp, &(s->a_mu[dir])); } g_sync(); /* Now Fourier transform */ restrict_fourier_site(F_OFFSET(a_mu[dir]), sizeof(su3_matrix), FORWARDS); }
/* do measurements: load density, ploop, etc. and phases onto lattice */ void measure() { register int i,j,k, c; register site *s; int dx,dy,dz; /* separation for correlated observables */ int dir; /* direction of separation */ msg_tag *tag; register complex cc,dd; /*scratch*/ complex ztr, zcof, znum, zdet, TC, zd, density, zphase; complex p[4]; /* probabilities of n quarks at a site */ complex np[4]; /* probabilities at neighbor site */ complex pp[4][4]; /* joint probabilities of n here and m there */ complex zplp, plp; Real locphase, phase; /* First make T (= timelike P-loop) from s->ploop_t T stored in s->tempmat1 */ ploop_less_slice(nt-1,EVEN); ploop_less_slice(nt-1,ODD); phase = 0.; density = plp = cmplx(0.0, 0.0); for(j=0;j<4;j++){ p[j]=cmplx(0.0,0.0); for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0); } FORALLSITES(i,s) { if(s->t != nt-1) continue; mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1)); zplp = trace_su3(&(s->tempmat1)); CSUM(plp, zplp); ztr = trace_su3(&(s->tempmat1)); CONJG(ztr, zcof); for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C; zdet = det_su3(&(s->tempmat1)); znum = numer(C, ztr, zcof); CDIV(znum, zdet, zd); CSUM(density, zd); /* store n_quark probabilities at this site in lattice variable qprob[], accumulate sum over lattice in p[] */ cc = cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]); CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]); CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]); cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]); locphase = carg(&zdet); phase += locphase; } g_floatsum( &phase ); g_complexsum( &density ); g_complexsum( &p[0] ); g_complexsum( &p[1] ); g_complexsum( &p[2] ); g_complexsum( &p[3] ); g_complexsum( &plp ); CDIVREAL(density,(Real)(nx*ny*nz),density); CDIVREAL(p[0],(Real)(nx*ny*nz),p[0]); CDIVREAL(p[1],(Real)(nx*ny*nz),p[1]); CDIVREAL(p[2],(Real)(nx*ny*nz),p[2]); CDIVREAL(p[3],(Real)(nx*ny*nz),p[3]); CDIVREAL(plp,(Real)(nx*ny*nz),plp); zphase = ce_itheta(phase); if(this_node == 0) { printf("ZMES\t%e\t%e\t%e\t%e\t%e\t%e\n", zphase.real, zphase.imag, density.real, density.imag, plp.real, plp.imag); printf("PMES\t%e\t%e\t%e\t%e\t%e\t%e\t%e\t%e\n", p[0].real, p[0].imag, p[1].real, p[1].imag, p[2].real, p[2].imag, p[3].real, p[3].imag ); } #ifdef PPCORR dx=1; dy=0; dz=0; /* Temporary - right now we just do nearest neighbor */ for(dir=XUP;dir<=ZUP;dir++){ tag = start_gather_site( F_OFFSET(qprob[0]), 4*sizeof(complex), dir, EVENANDODD, gen_pt[0] ); wait_gather(tag); FORALLSITES(i,s)if(s->t==nt-1){ for(j=0;j<4;j++)for(k=0;k<4;k++){ CMUL( (s->qprob)[j],((complex *)gen_pt[0][i])[k],cc); CSUM(pp[j][k],cc); } } cleanup_gather(tag); } /* density correlation format: PP dx dy dz n1 n2 real imag */ for(j=0;j<4;j++)for(k=0;k<4;k++){ g_complexsum( &pp[j][k] ); CDIVREAL(pp[j][k],(Real)(3*nx*ny*nz),pp[j][k]); if(this_node==0) printf("PP %d %d %d %d %d %e %e\n",dx,dy,dz,j,k, pp[j][k].real,pp[j][k].imag); } #endif /*PPCORR*/ }
/* Get the coefficients f of the expansion of exp(iQ) */ static void get_fs_from_Qs( complex f[3], su3_matrix *Q, su3_matrix *QQ){ su3_matrix QQQ; Real trQQQ, trQQ, c0, c1; Real c0abs, c0max, theta; Real eps, sqtwo = sqrt(2.); Real u, w, u_sq, w_sq, xi0; Real cosu, sinu, cosw, sin2u, cos2u, ucosu, usinu, ucos2u, usin2u; Real denom, subexp1, subexp2, subexp3, subexp; mult_su3_nn ( Q, QQ, &QQQ ); trQQ = (trace_su3( QQ )).real; trQQQ = (trace_su3( &QQQ )).real; c0 = 1./3. * trQQQ; c1 = 1./2. * trQQ; if( c1 < 4.0e-3 ) { // RGE: set to 4.0e-3 (CM uses this value). I ran into nans with 1.0e-4 // ======================================================================= // // Corner Case 1: if c1 < 1.0e-4 this implies c0max ~ 3x10^-7 // and in this case the division c0/c0max in arccos c0/c0max can be undefined // and produce NaN's // In this case what we can do is get the f-s a different way. We go back to basics: // // We solve (using maple) the matrix equations using the eigenvalues // // [ 1, q_1, q_1^2 ] [ f_0 ] [ exp( iq_1 ) ] // [ 1, q_2, q_2^2 ] [ f_1 ] = [ exp( iq_2 ) ] // [ 1, q_3, q_3^2 ] [ f_2 ] [ exp( iq_3 ) ] // // with q_1 = 2 u w, q_2 = -u + w, q_3 = - u - w // // with u and w defined as u = sqrt( c_1/ 3 ) cos (theta/3) // and w = sqrt( c_1 ) sin (theta/3) // theta = arccos ( c0 / c0max ) // leaving c0max as a symbol. // // we then expand the resulting f_i as a series around c0 = 0 and c1 = 0 // and then substitute in c0max = 2 ( c_1/ 3)^(3/2) // // we then convert the results to polynomials and take the real and imaginary parts: // we get at the end of the day (to low order) // 1 2 // f0[re] := 1 - --- c0 + h.o.t // 720 // // 1 1 1 2 // f0[im] := - - c0 + --- c0 c1 - ---- c0 c1 + h.o.t // 6 120 5040 // // // 1 1 1 2 // f1[re] := -- c0 - --- c0 c1 + ----- c0 c1 + h.o.t // 24 360 13440 f // // 1 1 2 1 3 1 2 // f1[im] := 1 - - c1 + --- c1 - ---- c1 - ---- c0 + h.o.t // 6 120 5040 5040 // // 1 1 1 2 1 3 1 2 // f2[re] := - - + -- c1 - --- c1 + ----- c1 + ----- c0 + h.o.t // 2 24 720 40320 40320 // // 1 1 1 2 // f2[im] := --- c0 - ---- c0 c1 + ------ c0 c1 + h.o.t // 120 2520 120960 // We then express these using Horner's rule for more stable evaluation. // // ===================================================================== f[0].real = 1. - c0*c0/720.; f[0].imag = -(c0/6.)*(1. - (c1/20.)*(1. - (c1/42.))) ; f[1].real = c0/24.*(1. - c1/15.*(1. - 3.*c1/112.)) ; f[1].imag = 1.-c1/6.*(1. - c1/20.*(1. - c1/42.)) - c0*c0/5040. ; f[2].real = 0.5*(-1. + c1/12.*(1. - c1/30.*(1. - c1/56.)) + c0*c0/20160.); f[2].imag = 0.5*(c0/60.*(1. - c1/21.*(1. - c1/48.))); } else { // ======================================================================= // Normal case: Do as per Morningstar-Peardon paper // ======================================================================= c0abs = fabs( c0 ); c0max = 2*pow( c1/3., 1.5); // ======================================================================= // Now work out theta. In the paper the case where c0 -> c0max even when c1 is reasonable // Has never been considered, even though it can arise and can cause the arccos function // to fail // Here we handle it with series expansion // ======================================================================= eps = (c0max - c0abs)/c0max; if( eps < 0 ) { // ===================================================================== // Corner Case 2: Handle case when c0abs is bigger than c0max. // This can happen only when there is a rounding error in the ratio, and that the // ratio is really 1. This implies theta = 0 which we'll just set. // ===================================================================== theta = 0; } else if ( eps < 1.0e-3 ) { // ===================================================================== // Corner Case 3: c0->c0max even though c1 may be actually quite reasonable. // The ratio |c0|/c0max -> 1 but is still less than one, so that a // series expansion is possible. // SERIES of acos(1-epsilon): Good to O(eps^6) or with this cutoff to O(10^{-18}) Computed with Maple. // BTW: 1-epsilon = 1 - (c0max-c0abs)/c0max = 1-(1 - c0abs/c0max) = +c0abs/c0max // // ====================================================================== theta = sqtwo*sqrt(eps)*( 1 + ( (1./12.) + ( (3./160.) + ( (5./896.) + ( (35./18432.) + (63./90112.)*eps ) *eps) *eps) *eps) *eps); } else { theta = acos( c0abs/c0max ); } u = sqrt(c1/3.)*cos(theta/3.); w = sqrt(c1)*sin(theta/3.); u_sq = u*u; w_sq = w*w; if( fabs(w) < 0.05 ) { xi0 = 1. - (1./6.)*w_sq*( 1. - (1./20.)*w_sq*( 1. - (1./42.)*w_sq ) ); } else { xi0 = sin(w)/w; } cosu = cos(u); sinu = sin(u); cosw = cos(w); sin2u = sin(2*u); cos2u = cos(2*u); ucosu = u*cosu; usinu = u*sinu; ucos2u = u*cos2u; usin2u = u*sin2u; denom = 9.*u_sq - w_sq; subexp1 = u_sq - w_sq; subexp2 = 8*u_sq*cosw; subexp3 = (3*u_sq + w_sq)*xi0; f[0].real = ( (subexp1)*cos2u + cosu*subexp2 + 2*usinu*subexp3 ) / denom ; f[0].imag = ( (subexp1)*sin2u - sinu*subexp2 + 2*ucosu*subexp3 ) / denom ; subexp = (3*u_sq -w_sq)*xi0; f[1].real = (2*(ucos2u - ucosu*cosw)+subexp*sinu)/denom; f[1].imag = (2*(usin2u + usinu*cosw)+subexp*cosu)/denom; subexp=3*xi0; f[2].real = (cos2u - cosu*cosw -usinu*subexp) /denom ; f[2].imag = (sin2u + sinu*cosw -ucosu*subexp) /denom ; if( c0 < 0 ) { // f[0] = conj(f[0]); f[0].imag *= -1; //f[1] = -conj(f[1]); f[1].real *= -1; //f[2] = conj(f[2]); f[2].imag *= -1; } } // End of if( corner_caseP ) else {} }
static complex KS_2pt_trace(su3_matrix * antiquark, wilson_propagator * quark, int * g_snk, int n_snk, int *g_src, int n_src, int *p, site *s) { int t; int my_x; int my_y; int my_z; complex trace; int s0; int c0,c1,i; wilson_propagator temp,temp1; su3_matrix mat, mat1; t = s->t; my_x = s->x; my_y = s->y; my_z = s->z; temp = *quark; //multiply by gamma_snk for(i=0;i<n_snk;i++) for(c0=0;c0<3;c0++){ mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), g_snk[i]); temp.c[c0] = temp1.c[c0]; } //multiply by Omega field if((t % 2) == 1) for(c0=0;c0<3;c0++){ mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), TUP); temp.c[c0] = temp1.c[c0]; } if((my_x % 2) == 1) for(c0=0;c0<3;c0++){ mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), XUP); temp.c[c0] = temp1.c[c0]; } if((my_y % 2) == 1) for(c0=0;c0<3;c0++){ mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), YUP); temp.c[c0] = temp1.c[c0]; } if((my_z % 2) == 1) for(c0=0;c0<3;c0++){ mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), ZUP); temp.c[c0] = temp1.c[c0]; } //mulptiply by gamma_src for(c0=0;c0<3;c0++) for(i=0;i<n_src;i++) { mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), g_src[i]); temp.c[c0] = temp1.c[c0]; } for(c0=0;c0<3;c0++) for(c1=0;c1<3;c1++){ trace.real = 0.0; trace.imag = 0.0; for(s0=0;s0<4;s0++){ trace.real += temp.c[c0].d[s0].d[s0].c[c1].real; trace.imag += temp.c[c0].d[s0].d[s0].c[c1].imag; } mat.e[c0][c1].real = trace.real; mat.e[c0][c1].imag = trace.imag; } mult_su3_na(&mat, antiquark, &mat1); //antiquark is just the staggered prop su3 matrix trace = trace_su3(&mat1); return(trace); }
static void get_fs_and_bs_from_Qs( complex f[3], complex b1[3], complex b2[3], su3_matrix *Q, su3_matrix *QQ, int do_bs ) { su3_matrix QQQ; Real trQQQ, trQQ, c0, c1; Real c0abs, c0max, theta; Real eps, sqtwo = sqrt(2.); Real u, w, u_sq, w_sq, xi0, xi1; Real cosu, sinu, cosw, sinw, sin2u, cos2u, ucosu, usinu, ucos2u, usin2u; Real denom; Real r_1_re[3], r_1_im[3], r_2_re[3], r_2_im[3]; Real b_denom; mult_su3_nn ( Q, QQ, &QQQ ); trQQ = (trace_su3( QQ )).real; trQQQ = (trace_su3( &QQQ )).real; c0 = 1./3. * trQQQ; c1 = 1./2. * trQQ; if( c1 < 4.0e-3 ) { // RGE: set to 4.0e-3 (CM uses this value). I ran into nans with 1.0e-4 // ======================================================================= // // Corner Case 1: if c1 < 1.0e-4 this implies c0max ~ 3x10^-7 // and in this case the division c0/c0max in arccos c0/c0max can be undefined // and produce NaN's // In this case what we can do is get the f-s a different way. We go back to basics: // // We solve (using maple) the matrix equations using the eigenvalues // // [ 1, q_1, q_1^2 ] [ f_0 ] [ exp( iq_1 ) ] // [ 1, q_2, q_2^2 ] [ f_1 ] = [ exp( iq_2 ) ] // [ 1, q_3, q_3^2 ] [ f_2 ] [ exp( iq_3 ) ] // // with q_1 = 2 u w, q_2 = -u + w, q_3 = - u - w // // with u and w defined as u = sqrt( c_1/ 3 ) cos (theta/3) // and w = sqrt( c_1 ) sin (theta/3) // theta = arccos ( c0 / c0max ) // leaving c0max as a symbol. // // we then expand the resulting f_i as a series around c0 = 0 and c1 = 0 // and then substitute in c0max = 2 ( c_1/ 3)^(3/2) // // we then convert the results to polynomials and take the real and imaginary parts: // we get at the end of the day (to low order) // 1 2 // f0[re] := 1 - --- c0 + h.o.t // 720 // // 1 1 1 2 // f0[im] := - - c0 + --- c0 c1 - ---- c0 c1 + h.o.t // 6 120 5040 // // // 1 1 1 2 // f1[re] := -- c0 - --- c0 c1 + ----- c0 c1 + h.o.t // 24 360 13440 f // // 1 1 2 1 3 1 2 // f1[im] := 1 - - c1 + --- c1 - ---- c1 - ---- c0 + h.o.t // 6 120 5040 5040 // // 1 1 1 2 1 3 1 2 // f2[re] := - - + -- c1 - --- c1 + ----- c1 + ----- c0 + h.o.t // 2 24 720 40320 40320 // // 1 1 1 2 // f2[im] := --- c0 - ---- c0 c1 + ------ c0 c1 + h.o.t // 120 2520 120960 // We then express these using Horner's rule for more stable evaluation. // // to get the b-s we use the fact that // b2_i = d f_i / d c0 // and b1_i = d f_i / d c1 // // where the derivatives are partial derivatives // // And we just differentiate the polynomials above (keeping the same level // of truncation) and reexpress that as Horner's rule // // This clearly also handles the case of a unit gauge as no c1, u etc appears in the // denominator and the arccos is never taken. In this case, we have the results in // the raw c0, c1 form and we don't need to flip signs and take complex conjugates. // // (not CD) I checked the expressions below by taking the difference between the Horner forms // below from the expanded forms (and their derivatives) above and checking for the // differences to be zero. At this point in time maple seems happy. // ================================================================== f[0].real = 1. - c0*c0/720.; f[0].imag = -(c0/6.)*(1. - (c1/20.)*(1. - (c1/42.))) ; f[1].real = c0/24.*(1. - c1/15.*(1. - 3.*c1/112.)) ; f[1].imag = 1.-c1/6.*(1. - c1/20.*(1. - c1/42.)) - c0*c0/5040. ; f[2].real = 0.5*(-1. + c1/12.*(1. - c1/30.*(1. - c1/56.)) + c0*c0/20160.); f[2].imag = 0.5*(c0/60.*(1. - c1/21.*(1. - c1/48.))); if( do_bs ) { // partial f0/ partial c0 b2[0].real = -c0/360.; b2[0].imag = -(1./6.)*(1.-(c1/20.)*(1.-c1/42.)); // partial f0 / partial c1 // b1[0].real = 0; b1[0].imag = (c0/120.)*(1.-c1/21.); // partial f1 / partial c0 // b2[1].real = (1./24.)*(1.-c1/15.*(1.-3.*c1/112.)); b2[1].imag = -c0/2520.; // partial f1 / partial c1 b1[1].real = -c0/360.*(1. - 3.*c1/56. ); b1[1].imag = -1./6.*(1.-c1/10.*(1.-c1/28.)); // partial f2/ partial c0 b2[2].real = 0.5*c0/10080.; b2[2].imag = 0.5*( 1./60.*(1.-c1/21.*(1.-c1/48.)) ); // partial f2/ partial c1 b1[2].real = 0.5*( 1./12.*(1.-(2.*c1/30.)*(1.-3.*c1/112.)) ); b1[2].imag = 0.5*( -c0/1260.*(1.-c1/24.) ); } // do_bs } else { // ======================================================================= // Normal case: Do as per Morningstar-Peardon paper // ======================================================================= c0abs = fabs( c0 ); c0max = 2*pow( c1/3., 1.5); // ======================================================================= // Now work out theta. In the paper the case where c0 -> c0max even when c1 is reasonable // Has never been considered, even though it can arise and can cause the arccos function // to fail // Here we handle it with series expansion // ======================================================================= eps = (c0max - c0abs)/c0max; if( eps < 0 ) { // ===================================================================== // Corner Case 2: Handle case when c0abs is bigger than c0max. // This can happen only when there is a rounding error in the ratio, and that the // ratio is really 1. This implies theta = 0 which we'll just set. // ===================================================================== theta = 0; } else if ( eps < 1.0e-3 ) { // ===================================================================== // Corner Case 3: c0->c0max even though c1 may be actually quite reasonable. // The ratio |c0|/c0max -> 1 but is still less than one, so that a // series expansion is possible. // SERIES of acos(1-epsilon): Good to O(eps^6) or with this cutoff to O(10^{-18}) Computed with Maple. // BTW: 1-epsilon = 1 - (c0max-c0abs)/c0max = 1-(1 - c0abs/c0max) = +c0abs/c0max // // ====================================================================== theta = sqtwo*sqrt(eps)*( 1 + ( (1./12.) + ( (3./160.) + ( (5./896.) + ( (35./18432.) + (63./90112.)*eps ) *eps) *eps) *eps) *eps); } else { // theta = acos( c0abs/c0max ); } u = sqrt(c1/3.)*cos(theta/3.); w = sqrt(c1)*sin(theta/3.); u_sq = u*u; w_sq = w*w; if( fabs(w) < 0.05 ) { xi0 = 1. - (1./6.)*w_sq*( 1. - (1./20.)*w_sq*( 1. - (1./42.)*w_sq ) ); } else { xi0 = sin(w)/w; } if( do_bs) { if( fabs(w) < 0.05 ) { xi1 = -( 1./3. - (1./30.)*w_sq*( 1. - (1./28.)*w_sq*( 1. - (1./54.)*w_sq ) ) ); } else { xi1 = cos(w)/w_sq - sin(w)/(w_sq*w); } } cosu = cos(u); sinu = sin(u); cosw = cos(w); sinw = sin(w); sin2u = sin(2*u); cos2u = cos(2*u); ucosu = u*cosu; usinu = u*sinu; ucos2u = u*cos2u; usin2u = u*sin2u; denom = 9.*u_sq - w_sq; { Real subexp1, subexp2, subexp3; subexp1 = u_sq - w_sq; subexp2 = 8*u_sq*cosw; subexp3 = (3*u_sq + w_sq)*xi0; f[0].real = ( (subexp1)*cos2u + cosu*subexp2 + 2*usinu*subexp3 ) / denom ; f[0].imag = ( (subexp1)*sin2u - sinu*subexp2 + 2*ucosu*subexp3 ) / denom ; } { Real subexp; subexp = (3*u_sq -w_sq)*xi0; f[1].real = (2*(ucos2u - ucosu*cosw)+subexp*sinu)/denom; f[1].imag = (2*(usin2u + usinu*cosw)+subexp*cosu)/denom; } { Real subexp; subexp=3*xi0; f[2].real = (cos2u - cosu*cosw -usinu*subexp) /denom ; f[2].imag = (sin2u + sinu*cosw -ucosu*subexp) /denom ; } if( do_bs ) { { Real subexp1, subexp2, subexp3; // r_1[0]=Double(2)*cmplx(u, u_sq-w_sq)*exp2iu // + 2.0*expmiu*( cmplx(8.0*u*cosw, -4.0*u_sq*cosw) // + cmplx(u*(3.0*u_sq+w_sq),9.0*u_sq+w_sq)*xi0 ); subexp1 = u_sq - w_sq; subexp2 = 8.*cosw + (3.*u_sq + w_sq)*xi0 ; subexp3 = 4.*u_sq*cosw - (9.*u_sq + w_sq)*xi0 ; r_1_re[0] = 2.*(ucos2u - sin2u *(subexp1)+ucosu*( subexp2 )- sinu*( subexp3 ) ); r_1_im[0] = 2.*(usin2u + cos2u *(subexp1)-usinu*( subexp2 )- cosu*( subexp3 ) ); } { Real subexp1, subexp2; // r_1[1]=cmplx(2.0, 4.0*u)*exp2iu + expmiu*cmplx(-2.0*cosw-(w_sq-3.0*u_sq)*xi0,2.0*u*cosw+6.0*u*xi0); subexp1 = cosw+3.*xi0; subexp2 = 2.*cosw + xi0*(w_sq - 3.*u_sq); r_1_re[1] = 2.*((cos2u - 2.*usin2u) + usinu*subexp1) - cosu*subexp2; r_1_im[1] = 2.*((sin2u + 2.*ucos2u) + ucosu*subexp1) + sinu*subexp2; } { Real subexp; // r_1[2]=2.0*timesI(exp2iu) +expmiu*cmplx(-3.0*u*xi0, cosw-3*xi0); subexp = cosw - 3.*xi0; r_1_re[2] = -2.*sin2u -3.*ucosu*xi0 + sinu*subexp; r_1_im[2] = 2.*cos2u +3.*usinu*xi0 + cosu*subexp; } { Real subexp; //r_2[0]=-2.0*exp2iu + 2*cmplx(0,u)*expmiu*cmplx(cosw+xi0+3*u_sq*xi1, // 4*u*xi0); subexp = cosw + xi0 + 3.*u_sq*xi1; r_2_re[0] = -2.*(cos2u + u*( 4.*ucosu*xi0 - sinu*subexp) ); r_2_im[0] = -2.*(sin2u - u*( 4.*usinu*xi0 + cosu*subexp) ); } { Real subexp; // r_2[1]= expmiu*cmplx(cosw+xi0-3.0*u_sq*xi1, 2.0*u*xi0); // r_2[1] = timesMinusI(r_2[1]); subexp = cosw + xi0 - 3.*u_sq*xi1; r_2_re[1] = 2.*ucosu*xi0 - sinu*subexp; r_2_im[1] = -2.*usinu*xi0 - cosu*subexp; } { Real subexp; //r_2[2]=expmiu*cmplx(xi0, -3.0*u*xi1); subexp = 3.*xi1; r_2_re[2] = cosu*xi0 - usinu*subexp ; r_2_im[2] = -( sinu*xi0 + ucosu*subexp ) ; } b_denom=2.*denom*denom; { Real subexp1, subexp2, subexp3; int j; subexp1 = 2.*u; subexp2 = 3.*u_sq - w_sq; subexp3 = 2.*(15.*u_sq + w_sq); for(j=0; j < 3; j++) { b1[j].real=( subexp1*r_1_re[j] + subexp2*r_2_re[j] - subexp3*f[j].real )/b_denom; b1[j].imag=( subexp1*r_1_im[j] + subexp2*r_2_im[j] - subexp3*f[j].imag )/b_denom; } } { Real subexp1, subexp2; int j; subexp1 = 3.*u; subexp2 = 24.*u; for(j=0; j < 3; j++) { b2[j].real=( r_1_re[j] - subexp1*r_2_re[j] - subexp2 * f[j].real )/b_denom; b2[j].imag=( r_1_im[j] - subexp1*r_2_im[j] - subexp2 * f[j].imag )/b_denom; } } // Now flip the coefficients of the b-s if( c0 < 0 ) { //b1_site[0] = conj(b1_site[0]); b1[0].imag *= -1; //b1_site[1] = -conj(b1_site[1]); b1[1].real *= -1; //b1_site[2] = conj(b1_site[2]); b1[2].imag *= -1; //b2_site[0] = -conj(b2_site[0]); b2[0].real *= -1; //b2_site[1] = conj(b2_site[1]); b2[1].imag *= -1; //b2_site[2] = -conj(b2_site[2]); b2[2].real *= -1; } } // end of if (do_bs) // Now when everything is done flip signs of the b-s (can't do this before // as the unflipped f-s are needed to find the b-s if( c0 < 0 ) { // f[0] = conj(f[0]); f[0].imag *= -1; //f[1] = -conj(f[1]); f[1].real *= -1; //f[2] = conj(f[2]); f[2].imag *= -1; } } // End of if( corner_caseP ) else {} }