static void imdct15_half(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride) { FFTComplex fft15in[15]; FFTComplex *z = (FFTComplex *)dst; int i, j, len8 = s->len4 >> 1, l_ptwo = 1 << s->ptwo_fft.nbits; const float *in1 = src, *in2 = src + (s->len2 - 1) * stride; /* Reindex input, putting it into a buffer and doing an Nx15 FFT */ for (i = 0; i < l_ptwo; i++) { for (j = 0; j < 15; j++) { const int k = s->pfa_prereindex[i*15 + j]; FFTComplex tmp = { *(in2 - 2*k*stride), *(in1 + 2*k*stride) }; CMUL3(fft15in[j], tmp, s->twiddle_exptab[k]); } s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo); } /* Then a 15xN FFT (where N is a power of two) */ for (i = 0; i < 15; i++) s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); /* Reindex again, apply twiddles and output */ for (i = 0; i < len8; i++) { const int i0 = len8 + i, i1 = len8 - i - 1; const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1]; CMUL(z[i1].re, z[i0].im, s->tmp[s1].im, s->tmp[s1].re, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re); CMUL(z[i0].re, z[i1].im, s->tmp[s0].im, s->tmp[s0].re, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re); } }
static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp) { int k, n4, n2, n, j; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; const FFTSample *in1, *in2; FFTComplex *z = (FFTComplex *)tmp; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; /* pre rotation */ in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { j=revtab[k]; CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; } ff_fft_calc(&s->fft, z); /* post rotation + reordering */ /* XXX: optimize */ for(k = 0; k < n4; k++) { CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); } }
void qpb_ft(qpb_complex **out, qpb_complex **in, int n, int mom[][4], int nmom) { int lvol = problem_params.l_vol; int lt = problem_params.l_dim[0]; int lvol3d = lvol/lt; double pi = atan(1.0)*4.0; for(int p=0; p<nmom; p++) { for(int i=0; i<n; i++) out[i][p] = (qpb_complex){0., 0.}; for(int lv=0; lv<lvol3d; lv++) { unsigned short int *gdim = problem_params.g_dim; unsigned short int *ldim = problem_params.l_dim; int lx = X_INDEX(lv, ldim); int ly = Y_INDEX(lv, ldim); int lz = Z_INDEX(lv, ldim); unsigned short int *coords = problem_params.coords; int x = coords[3]*ldim[3]+lx; int y = coords[2]*ldim[2]+ly; int z = coords[1]*ldim[1]+lz; qpb_double phi = (double)((double)x*mom[p][3]/gdim[3] + (double)y*mom[p][2]/gdim[2] + (double)z*mom[p][1]/gdim[1]); phi = phi*2*pi; qpb_complex phase = {cos(phi),-sin(phi)}; #ifdef OPENMP # pragma omp parallel for #endif for(int i=0; i<n; i++) { qpb_complex c; c = CMUL(phase, in[i][lv]); out[i][p].re += c.re; out[i][p].im += c.im; } } } /* * Do this outside of OpenMP */ for(int p=0; p<nmom; p++) for(int i=0; i<n; i++) { qpb_complex recv; MPI_Reduce(&out[i][p].re, &recv.re, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&out[i][p].im, &recv.im, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Bcast(&recv.re, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&recv.im, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); out[i][p] = recv; } return; }
static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src, ptrdiff_t stride, float scale) { FFTComplex *z = (FFTComplex *)dst; const int len8 = s->len4 / 2; const float *in1 = src; const float *in2 = src + (s->len2 - 1) * stride; int i; for (i = 0; i < s->len4; i++) { FFTComplex tmp = { *in2, *in1 }; CMUL(s->tmp[i], tmp, s->twiddle_exptab[i]); in1 += 2 * stride; in2 -= 2 * stride; } fft_calc(s, z, s->tmp, s->fft_n, 1); for (i = 0; i < len8; i++) { float r0, i0, r1, i1; CMUL3(r0, i1, z[len8 - i - 1].im, z[len8 - i - 1].re, s->twiddle_exptab[len8 - i - 1].im, s->twiddle_exptab[len8 - i - 1].re); CMUL3(r1, i0, z[len8 + i].im, z[len8 + i].re, s->twiddle_exptab[len8 + i].im, s->twiddle_exptab[len8 + i].re); z[len8 - i - 1].re = scale * r0; z[len8 - i - 1].im = scale * i0; z[len8 + i].re = scale * r1; z[len8 + i].im = scale * i1; } }
void c_scalar_mult_wvec(wilson_vector *src, complex *phase, wilson_vector *dest) { #ifndef FAST register int i,j; complex t; for(i=0;i<4;i++){ for(j=0;j<3;j++){ CMUL( src->d[i].c[j], *phase, dest->d[i].c[j] ); } } #else register int i,j; #ifdef NATIVEDOUBLE register double sr,si,br,bi; #else register float sr,si,br,bi; #endif sr = (*phase).real; si = (*phase).imag; for(i=0;i<4;i++){ for(j=0;j<3;j++){ br=src->d[i].c[j].real; bi=src->d[i].c[j].imag; dest->d[i].c[j].real = sr*br - si*bi; dest->d[i].c[j].imag = sr*bi + si*br; } } #endif }
/* * FFT of the length 15 * (2^N) */ static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int N, ptrdiff_t stride) { if (N) { const FFTComplex *exptab = s->exptab[N]; const int len2 = 15 * (1 << (N - 1)); int k; fft_calc(s, out, in, N - 1, stride * 2); fft_calc(s, out + len2, in + stride, N - 1, stride * 2); for (k = 0; k < len2; k++) { FFTComplex t; CMUL(t, out[len2 + k], exptab[k]); out[len2 + k].re = out[k].re - t.re; out[len2 + k].im = out[k].im - t.im; out[k].re += t.re; out[k].im += t.im; } } else fft15(s, out, in, stride); }
void LDL_dsolve( int n, /* D is n-by-n, where n >= 0 */ double X[], /* size n. right-hand-side on input, soln. on output */ double D[] /* input of size n, not modified */ ){ int j; for (j = 0; j < n; j++){ //X[j] *= D[j]; // D is actually inv(D) double s[2], t[2]; CMUL(&D[8*j+0], &X[4*j+0], s); CADDPROD(s, &D[8*j+4], &X[4*j+2]); CMUL(&D[8*j+2], &X[4*j+0], t); CADDPROD(t, &D[8*j+6], &X[4*j+2]); CSET(&X[4*j+0], s); CSET(&X[4*j+2], t); } }
/** * Compute inverse MDCT of size N = 2^nbits * @param output N samples * @param input N/2 samples * @param tmp N/2 samples */ void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input, FFTSample *tmp) { int k, n8, n4, n2, n, j; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; const FFTSample *in1, *in2; FFTComplex *z = (FFTComplex *)tmp; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; /* pre rotation */ in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { j=revtab[k]; CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; } fft_calc(&s->fft, z); /* post rotation + reordering */ /* XXX: optimize */ for(k = 0; k < n4; k++) { CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); } for(k = 0; k < n8; k++) { output[2*k] = -z[n8 + k].im; output[n2-1-2*k] = z[n8 + k].im; output[2*k+1] = z[n8-1-k].re; output[n2-1-2*k-1] = -z[n8-1-k].re; output[n2 + 2*k]=-z[k+n8].re; output[n-1- 2*k]=-z[k+n8].re; output[n2 + 2*k+1]=z[n8-k-1].im; output[n-2 - 2 * k] = z[n8-k-1].im; } }
/** * Compute MDCT of size N = 2^nbits * @param input N samples * @param out N/2 samples */ void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input) { int i, j, n, n8, n4, n2, n3; FFTDouble re, im; const uint16_t *revtab = s->revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; FFTComplex *x = (FFTComplex *)out; n = 1 << s->mdct_bits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; n3 = 3 * n4; /* pre rotation */ for(i = 0; i < n8; i++) { re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]); im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]); j = revtab[i]; CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); re = RSCALE( input[2*i] - input[n2-1-2*i]); im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]); j = revtab[n8 + i]; CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); } s->fft_calc(s, x); /* post rotation */ for(i = 0; i < n8; i++) { FFTSample r0, i0, r1, i1; CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]); CMUL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]); x[n8-i-1].re = r0; x[n8-i-1].im = i0; x[n8+i ].re = r1; x[n8+i ].im = i1; } }
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride) { int i, j; const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1; const int l_ptwo = 1 << s->ptwo_fft.nbits; FFTComplex fft15in[15]; /* Folding and pre-reindexing */ for (i = 0; i < l_ptwo; i++) { for (j = 0; j < 15; j++) { float re, im; const int k = s->pfa_prereindex[i*15 + j]; if (k < len8) { re = -src[2*k+len3] - src[len3-1-2*k]; im = -src[len4+2*k] + src[len4-1-2*k]; } else { re = src[2*k-len4] - src[1*len3-1-2*k]; im = -src[2*k+len4] - src[5*len4-1-2*k]; } CMUL(fft15in[j].re, fft15in[j].im, re, im, s->twiddle_exptab[k].re, -s->twiddle_exptab[k].im); } s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo); } /* Then a 15xN FFT (where N is a power of two) */ for (i = 0; i < 15; i++) s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); /* Reindex again, apply twiddles and output */ for (i = 0; i < len8; i++) { float re0, im0, re1, im1; const int i0 = len8 + i, i1 = len8 - i - 1; const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1]; CMUL(im1, re0, s->tmp[s1].re, s->tmp[s1].im, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re); CMUL(im0, re1, s->tmp[s0].re, s->tmp[s0].im, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re); dst[2*i1*stride ] = re0; dst[2*i1*stride + stride] = im0; dst[2*i0*stride ] = re1; dst[2*i0*stride + stride] = im1; } }
void mult_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c ){ register int i,j; register complex x,y; for(i=0;i<3;i++){ x.real=x.imag=0.0; for(j=0;j<3;j++){ CMUL( a->e[i][j] , b->c[j] , y ) CSUM( x , y ); } c->c[i] = x; } }
/** * Compute MDCT of size N = 2^nbits * @param input N samples * @param out N/2 samples * @param tmp temporary storage of N/2 samples */ void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input, FFTSample *tmp) { int i, j, n, n8, n4, n2, n3; FFTSample re, im, re1, im1; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; FFTComplex *x = (FFTComplex *)tmp; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; n3 = 3 * n4; /* pre rotation */ for(i=0;i<n8;i++) { re = -input[2*i+3*n4] - input[n3-1-2*i]; im = -input[n4+2*i] + input[n4-1-2*i]; j = revtab[i]; CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); re = input[2*i] - input[n2-1-2*i]; im = -(input[n2+2*i] + input[n-1-2*i]); j = revtab[n8 + i]; CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); } ff_fft_calc(&s->fft, x); /* post rotation */ for(i=0;i<n4;i++) { re = x[i].re; im = x[i].im; CMUL(re1, im1, re, im, -tsin[i], -tcos[i]); out[2*i] = im1; out[n2-1-2*i] = re1; } }
static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, ptrdiff_t stride) { const FFTComplex *exptab = s->exptab[0]; FFTComplex tmp[5]; FFTComplex tmp1[5]; FFTComplex tmp2[5]; int k; fft5(tmp, in, stride * 3); fft5(tmp1, in + stride, stride * 3); fft5(tmp2, in + 2 * stride, stride * 3); for (k = 0; k < 5; k++) { FFTComplex t1, t2; CMUL(t1, tmp1[k], exptab[k]); CMUL(t2, tmp2[k], exptab[2 * k]); out[k].re = tmp[k].re + t1.re + t2.re; out[k].im = tmp[k].im + t1.im + t2.im; CMUL(t1, tmp1[k], exptab[k + 5]); CMUL(t2, tmp2[k], exptab[2 * (k + 5)]); out[k + 5].re = tmp[k].re + t1.re + t2.re; out[k + 5].im = tmp[k].im + t1.im + t2.im; CMUL(t1, tmp1[k], exptab[k + 10]); CMUL(t2, tmp2[k], exptab[2 * k + 5]); out[k + 10].re = tmp[k].re + t1.re + t2.re; out[k + 10].im = tmp[k].im + t1.im + t2.im; } }
/****************************************************************************** FUNCTION: check_unitarity ******************************************************************************/ void check_unitarity(su2_matrix *psub) { complex sum; complex conj; complex prod; Real tol; tol = 0.0001; sum.real = 0.0; sum.imag = 0.0; CONJG(psub->e[0][0], conj); CMUL(psub->e[0][0], conj, prod); CSUM(sum, prod); CONJG(psub->e[1][0], conj); CMUL(psub->e[1][0], conj, prod); CSUM(sum, prod); if (sum.real < 1.0 - tol || sum.real > 1.0 + tol || sum.imag < -tol || sum.imag > tol) { printf("%g\t%g\n", sum.real, sum.imag); } }
/** * Compute the middle half of the inverse MDCT of size N = 2^nbits, * thus excluding the parts that can be derived by symmetry * @param output N/2 samples * @param input N/2 samples */ void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input) { int k, n8, n4, n2, n, j; const uint16_t *revtab = s->revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; const FFTSample *in1, *in2; FFTComplex *z = (FFTComplex *)output; n = 1 << s->mdct_bits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; /* pre rotation */ in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { j = revtab[k]; CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; } s->fft_calc(s, z); /* post rotation + reordering */ for(k = 0; k < n8; k++) { FFTSample r0, i0, r1, i1; CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); z[n8-k-1].re = r0; z[n8-k-1].im = i0; z[n8+k ].re = r1; z[n8+k ].im = i1; } }
void llfat_mult_su3_nn( su3_matrix *a, su3_matrix *b, su3_matrix *c ) { int i,j,k; typeof(a->e[0][0]) x,y; for(i=0; i<3; i++)for(j=0; j<3; j++) { x.real=x.imag=0.0; for(k=0; k<3; k++) { CMUL( a->e[i][k] , b->e[k][j] , y ); CSUM( x , y ); } c->e[i][j] = x; } }
/* Computes the real trace of the su3 matrix product: ReTr(A*B) */ Real real_trace_nn( su3_matrix *a, su3_matrix *b ) { register int i,j; register complex x; register Real sum; sum = 0.0; for( i=0; i<3; i++ ) for( j=0; j<3; j++ ) { CMUL( a->e[i][j], b->e[j][i], x ); sum += x.real; } return sum; }
void MDCT::calcHalfIMDCT(float *output, const float *input) { Complex *z = (Complex *) output; const int size2 = _size >> 1; const int size4 = _size >> 2; const int size8 = _size >> 3; const uint16 *revTab = _fft->getRevTab(); // Pre rotation const float *in1 = input; const float *in2 = input + size2 - 1; for(int k = 0; k < size4; k++) { const int j = revTab[k]; CMUL(z[j].re, z[j].im, *in2, *in1, _tCos[k], _tSin[k]); in1 += 2; in2 -= 2; } _fft->calc(z); // Post rotation + reordering for(int k = 0; k < size8; k++) { float r0, i0, r1, i1; CMUL(r0, i1, z[size8-k-1].im, z[size8-k-1].re, _tSin[size8-k-1], _tCos[size8-k-1]); CMUL(r1, i0, z[size8+k ].im, z[size8+k ].re, _tSin[size8+k ], _tCos[size8+k ]); z[size8 - k - 1].re = r0; z[size8 - k - 1].im = i0; z[size8 + k ].re = r1; z[size8 + k ].im = i1; } }
/****************************************************************************** FUNCTION: mult_su2 ******************************************************************************/ void mult_su2(su2_matrix *pprod, su2_matrix *pmat1, su2_matrix *pmat2) { int p; /* index over matrix row */ int q; /* index over matrix column */ int k; /* summation index */ complex term; /* term in row times column */ for (p = 0; p < 2; p++) { for (q = 0; q < 2; q++) { pprod->e[p][q].real = 0.0; pprod->e[p][q].imag = 0.0; for (k = 0; k < 2; k++) { CMUL(pmat1->e[p][k], pmat2->e[k][q], term); CSUM(pprod->e[p][q], term); } } } }
// z[0],z[1] is (1,1) entry of matrix // z[2],z[3] is (2,1) entry of matrix, etc. static void invert_c2x2(double z[]){ // [a c] // [b d] // ad-bc double idet[2], tmp[2]; CMUL(&z[2*0], &z[2*3], idet); CMUL(&z[2*1], &z[2*2], tmp); CSUB(idet,tmp); CINV(idet); CSWP(&z[2*0], &z[2*3]); CNEG(&z[2*1]); CNEG(&z[2*2]); CMUL(&z[2*0], idet, tmp); CSET(&z[2*0], tmp); CMUL(&z[2*1], idet, tmp); CSET(&z[2*1], tmp); CMUL(&z[2*2], idet, tmp); CSET(&z[2*2], tmp); CMUL(&z[2*3], idet, tmp); CSET(&z[2*3], tmp); }
void ff_imdct_half_paired(FFTContext *s, FFTSample *output, const FFTSample *input) { int n = 1 << s->mdct_bits; int n2 = n >> 1; int n4 = n >> 2; int n8 = n >> 3; const uint16_t *revtab = s->revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; vector float pair[4], sub[2], add[2]; vector float result, cos, sin; FFTSample *base[6][2] = {{input-2,input+n2},{tcos-2,tcos+n4},{tsin-2,tsin+n4}, {output+n4,output+n4-2},{tcos+n8,tcos+n8-2},{tsin+n8,tsin+n8-2}}; int k, j; for (k=0, j=n4-2; k<n8; k+=2, j-=2) { pair[0] = psq_lu(8,base[0][0],0,0); pair[1] = psq_lu(8,base[0][0],0,0); pair[2] = psq_lu(-8,base[0][1],0,0); pair[3] = psq_lu(-8,base[0][1],0,0); cos = psq_lu(8,base[1][0],0,0); sin = psq_lu(8,base[2][0],0,0); CMUL(sub[0],add[0],pair[0],pair[1],pair[2],pair[3],sin,cos); cos = psq_lu(-8,base[1][1],0,0); sin = psq_lu(-8,base[2][1],0,0); CMUL(sub[1],add[1],pair[3],pair[2],pair[1],pair[0],sin,cos); result = paired_merge00(sub[0], add[0]); psq_stx(result,revtab[k]*8,output,0,0); result = paired_merge11(sub[0], add[0]); psq_stx(result,revtab[k+1]*8,output,0,0); result = paired_merge00(sub[1], add[1]); psq_stx(result,revtab[j]*8,output,0,0); result = paired_merge11(sub[1], add[1]); psq_stx(result,revtab[j+1]*8,output,0,0); } s->fft_calc(s, (FFTComplex *)output); for (k=0; k<n8; k+=2) { pair[0] = psq_lu(-8,base[3][0],0,0); pair[1] = psq_l(-8,base[3][0],0,0); cos = psq_lu(-8,base[4][0],0,0); sin = psq_lu(-8,base[5][0],0,0); CMUL(sub[0],add[1],pair[1],pair[0],pair[1],pair[0],cos,sin); pair[0] = psq_lu(8,base[3][1],0,0); pair[1] = psq_l(8,base[3][1],0,0); cos = psq_lu(8,base[4][1],0,0); sin = psq_lu(8,base[5][1],0,0); CMUL(sub[1],add[0],pair[0],pair[1],pair[0],pair[1],cos,sin); result = paired_merge10(sub[0], add[0]); psq_st(result,0,base[3][0],0,0); result = paired_merge01(sub[0], add[0]); psq_stu(result,-8,base[3][0],0,0); result = paired_merge01(sub[1], add[1]); psq_st(result,0,base[3][1],0,0); result = paired_merge10(sub[1], add[1]); psq_stu(result,8,base[3][1],0,0); } }
void meson_cont_mom(complex prop[], field_offset src1,field_offset src2, int base_pt, int q_stride, int op_stride, gamma_corr gamma_table[], int no_gamma_corr) { register int i; register site *s; double theta ; double factx = 2.0*PI/(1.0*nx) ; double facty = 2.0*PI/(1.0*ny) ; double factz = 2.0*PI/(1.0*nz) ; Real px,py,pz; complex phase_fact ; int my_t; int cf, sf, si; int i_gamma_corr,q_pt,prop_pt; complex g1,g2; spin_wilson_vector localmat,localmat2; /* temporary storage */ spin_wilson_vector quark; /* temporary storage for quark */ spin_wilson_vector antiquark; /* temporary storage for antiquark */ FORALLSITES(i,s) { my_t = s->t; /* copy src2 into quark */ for(si=0;si<4;si++) for(sf=0;sf<4;sf++) for(cf=0;cf<3;cf++) { quark.d[si].d[sf].c[cf] = ((spin_wilson_vector *)F_PT(s,src2))->d[si].d[sf].c[cf]; } /* next, construct antiquark from src1 */ /*first, dirac multiplication by the source gamma matrices (on left) */ /* antiquark = c.c. of quark propagator */ for(si=0;si<4;si++) for(sf=0;sf<4;sf++) for(cf=0;cf<3;cf++) { CONJG(((spin_wilson_vector *)F_PT(s,src1))->d[si].d[sf].c[cf], antiquark.d[sf].d[si].c[cf]); } /* left multiply antiquark by source gamma matrices, beginning with gamma_5 for quark -> antiquark */ mult_sw_by_gamma_l( &antiquark, &localmat, G5); /* right dirac multiplication by gamma-5 (finishing up antiquark) */ mult_sw_by_gamma_r( &localmat, &antiquark, G5); /* Run through the table of source-sink gamma matrices */ for(i_gamma_corr=0; i_gamma_corr<no_gamma_corr; i_gamma_corr++) { /* left multiply by the particular source dirac matrices */ /* result in localmat2 */ mult_sw_by_gamma_l( &antiquark, &localmat, gamma_table[i_gamma_corr].gin); mult_sw_by_gamma_r( &localmat, &localmat2, gamma_table[i_gamma_corr].gout); /* Run through all sink momenta */ for(q_pt=0; q_pt<no_q_values; q_pt++) { px = q_momstore[q_pt][0]; py = q_momstore[q_pt][1]; pz = q_momstore[q_pt][2]; theta = factx*(s->x)*px + facty*(s->y)*py + factz*(s->z)*pz; phase_fact = cmplx((Real) cos(theta) , (Real) sin(theta)) ; prop_pt = my_t + base_pt + q_pt * q_stride + i_gamma_corr * op_stride; /* trace over propagators */ for(si=0;si<4;si++) for(sf=0;sf<4;sf++) for(cf=0;cf<3;cf++) { g1 = localmat2.d[si].d[sf].c[cf]; CMUL( quark.d[sf].d[si].c[cf] , phase_fact, g2); prop[prop_pt ].real += (g1.real*g2.real - g1.imag*g2.imag); prop[prop_pt ].imag += (g1.real*g2.imag + g1.imag*g2.real); } } } } /**** end of the loop over lattice sites ******/
/* do measurements: load density, ploop, etc. and phases onto lattice */ void measure() { register int i,j,k, c; register site *s; int dx,dy,dz; /* separation for correlated observables */ int dir; /* direction of separation */ msg_tag *tag; register complex cc,dd; /*scratch*/ complex ztr, zcof, znum, zdet, TC, zd, density, zphase; complex p[4]; /* probabilities of n quarks at a site */ complex np[4]; /* probabilities at neighbor site */ complex pp[4][4]; /* joint probabilities of n here and m there */ complex zplp, plp; Real locphase, phase; /* First make T (= timelike P-loop) from s->ploop_t T stored in s->tempmat1 */ ploop_less_slice(nt-1,EVEN); ploop_less_slice(nt-1,ODD); phase = 0.; density = plp = cmplx(0.0, 0.0); for(j=0;j<4;j++){ p[j]=cmplx(0.0,0.0); for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0); } FORALLSITES(i,s) { if(s->t != nt-1) continue; mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1)); zplp = trace_su3(&(s->tempmat1)); CSUM(plp, zplp); ztr = trace_su3(&(s->tempmat1)); CONJG(ztr, zcof); for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C; zdet = det_su3(&(s->tempmat1)); znum = numer(C, ztr, zcof); CDIV(znum, zdet, zd); CSUM(density, zd); /* store n_quark probabilities at this site in lattice variable qprob[], accumulate sum over lattice in p[] */ cc = cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]); CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]); CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]); cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]); locphase = carg(&zdet); phase += locphase; } g_floatsum( &phase ); g_complexsum( &density ); g_complexsum( &p[0] ); g_complexsum( &p[1] ); g_complexsum( &p[2] ); g_complexsum( &p[3] ); g_complexsum( &plp ); CDIVREAL(density,(Real)(nx*ny*nz),density); CDIVREAL(p[0],(Real)(nx*ny*nz),p[0]); CDIVREAL(p[1],(Real)(nx*ny*nz),p[1]); CDIVREAL(p[2],(Real)(nx*ny*nz),p[2]); CDIVREAL(p[3],(Real)(nx*ny*nz),p[3]); CDIVREAL(plp,(Real)(nx*ny*nz),plp); zphase = ce_itheta(phase); if(this_node == 0) { printf("ZMES\t%e\t%e\t%e\t%e\t%e\t%e\n", zphase.real, zphase.imag, density.real, density.imag, plp.real, plp.imag); printf("PMES\t%e\t%e\t%e\t%e\t%e\t%e\t%e\t%e\n", p[0].real, p[0].imag, p[1].real, p[1].imag, p[2].real, p[2].imag, p[3].real, p[3].imag ); } #ifdef PPCORR dx=1; dy=0; dz=0; /* Temporary - right now we just do nearest neighbor */ for(dir=XUP;dir<=ZUP;dir++){ tag = start_gather_site( F_OFFSET(qprob[0]), 4*sizeof(complex), dir, EVENANDODD, gen_pt[0] ); wait_gather(tag); FORALLSITES(i,s)if(s->t==nt-1){ for(j=0;j<4;j++)for(k=0;k<4;k++){ CMUL( (s->qprob)[j],((complex *)gen_pt[0][i])[k],cc); CSUM(pp[j][k],cc); } } cleanup_gather(tag); } /* density correlation format: PP dx dy dz n1 n2 real imag */ for(j=0;j<4;j++)for(k=0;k<4;k++){ g_complexsum( &pp[j][k] ); CDIVREAL(pp[j][k],(Real)(3*nx*ny*nz),pp[j][k]); if(this_node==0) printf("PP %d %d %d %d %d %e %e\n",dx,dy,dz,j,k, pp[j][k].real,pp[j][k].imag); } #endif /*PPCORR*/ }
void setup_lambda() { int i, j, k, l, count; complex inv_sqrt = cmplx(1.0 / sqrt(2.0), 0.0); complex i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(2.0)); #ifdef DEBUG_CHECK int a; complex trace, tt; node0_printf("Computing generators for U(N)\n"); #endif // Make sure Lambda matrices are initialized for (i = 0; i < DIMF; i++) clear_mat(&(Lambda[i])); // N * (N - 1) off-diagonal SU(N) generators // (T^{ij, +})_{kl} = i * (de_{ki} de_{lj} + de_{kj} de_{li}) / sqrt(2) // (T^{ij, -})_{kl} = (de_{ki} de_{lj} - de_{kj} de_{ki}) / sqrt(2) // Sign in second chosen to match previous values count = 0; for (i = 0; i < NCOL; i++) { for (j = i + 1; j < NCOL; j++) { for (k = 0; k < NCOL; k++) { for (l = 0; l < NCOL; l++) { if (k == i && l == j) { CSUM(Lambda[count].e[k][l], i_inv_sqrt); CSUM(Lambda[count + 1].e[k][l], inv_sqrt); } else if (k == j && l == i) { CSUM(Lambda[count].e[k][l], i_inv_sqrt); CDIF(Lambda[count + 1].e[k][l], inv_sqrt); } } } count += 2; } } if (count != NCOL * (NCOL - 1)) { node0_printf("ERROR: Wrong number of off-diagonal generators, "); node0_printf("%d vs. %d\n", count, NCOL * (NCOL - 1)); terminate(1); } // N - 1 diagonal SU(N) generators // T^k = i * diag(1, 1, ..., -k, 0, ..., 0) / sqrt(k * (k + 1)) for (i = 0; i < NCOL - 1; i++) { j = NCOL * (NCOL - 1) + i; // Index after +/- above k = i + 1; i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(k * (k + 1.0))); for (l = 0; l <= k; l++) Lambda[j].e[l][l] = i_inv_sqrt; CMULREAL(Lambda[j].e[k][k], -1.0 * k, Lambda[j].e[k][k]); } // U(1) generator i * I_N / sqrt(N) if (DIMF == NCOL * NCOL) { // Allow SU(N) compilation for now i_inv_sqrt = cmplx(0.0, sqrt(one_ov_N)); clear_mat(&(Lambda[DIMF - 1])); for (i = 0; i < NCOL; i++) Lambda[DIMF - 1].e[i][i] = i_inv_sqrt; } #ifdef DEBUG_CHECK // Print Lambdas for (i = 0; i < DIMF; i++){ node0_printf("Lambda[%d]\n",i); if (this_node == 0) dumpmat(&(Lambda[i])); } // Test group theory node0_printf("Check group theory "); node0_printf("Sum_a Lambda^a_{kl} Lambda^a_{ij} = -delta_kj delta_il\n"); for (i = 0; i < NCOL; i++) { for (j = 0; j < NCOL; j++) { for (k = 0; k < NCOL; k++) { for (l = 0; l < NCOL; l++) { trace = cmplx(0, 0); for (a = 0; a < DIMF; a++) { CMUL(Lambda[a].e[k][l], Lambda[a].e[i][j], tt); CSUM(trace, tt); } if (cabs_sq(&trace) > IMAG_TOL) node0_printf("Sum_a La^a_{%d%d} La^a_{%d%d} = (%.4g, %.4g)\n", k, j, i, l, trace.real, trace.imag); } } } } #endif // Test orthogonality and compute products of Lambdas for fermion forces #ifdef DEBUG_CHECK for (i = 0; i < DIMF; i++) { for (j = 0; j < DIMF; j++) { mult_nn(&(Lambda[i]), &(Lambda[j]), &tmat); trace = trace(&tmat); if (trace.real * trace.real > IMAG_TOL) node0_printf("Tr[T_%d T_%d] = (%.4g, %.4g)\n", i, j, trace.real, trace.imag); } } #endif }
/* * Computes meson 2pt function for gammas: * g5-g5, g5-g4g5, g4g5-g5, g4g5-g4g5, g1-g1, g2-g2, g3-g3 * * The function does not return anything. It writes the correlation functions * to a file (as ascii). * * Updated for non-zero momentum correlator. Correlator calculated explicitely * for all momentum vectors (i.e. non-FFT) * */ void qpb_mesons_2pt_corr(qpb_spinor_field *light, qpb_spinor_field *heavy, int max_q2, char outfile[]) { if(heavy == NULL) heavy = light; /* This should never happen. For now the package is built so that only x, y and z are parallelized accross MPI and t along OpenMP */ if(problem_params.par_dir[0] == 1) { error(" %s() not implemented for distributed t-direction, quiting\n", __func__); exit(QPB_NOT_IMPLEMENTED_ERROR); } int lvol = problem_params.l_vol; int lt = problem_params.l_dim[0]; int lvol3d = lvol/lt; qpb_complex **corr_x; qpb_complex **corr_k; qpb_complex **corr[QPB_N_MESON_2PT_CHANNELS]; int N = (NS*NS*NS*NS); qpb_complex prod[N]; int ndirac = 0; int mu[N],nu[N],ku[N],lu[N]; qpb_complex gamma_5x[NS][NS]; qpb_complex gamma_5y[NS][NS]; qpb_complex gamma_5z[NS][NS]; int nmom = 0, nq = (int)sqrt(max_q2)+1; int (*mom)[4]; /* Count momentum vectors <= max_q2 */ for(int z=-nq; z<nq; z++) for(int y=-nq; y<nq; y++) for(int x=-nq; x<nq; x++) { double q2 = x*x+y*y+z*z; if(q2 <= max_q2) nmom++; } mom = qpb_alloc(sizeof(int)*4*nmom); nmom = 0; /* Store momentum vectors <= max_q2 */ for(int z=-nq; z<nq; z++) for(int y=-nq; y<nq; y++) for(int x=-nq; x<nq; x++) { double q2 = x*x+y*y+z*z; if(q2 <= max_q2) { mom[nmom][3] = x; mom[nmom][2] = y; mom[nmom][1] = z; mom[nmom][0] = q2; nmom++; } } /* Sort in ascending q^2 value */ for(int i=0; i<nmom; i++) { int x = mom[i][0]; /* the q^2 value */ int k = i; for(int j=i+1; j<nmom; j++) if(mom[j][0] < x) { k = j; x = mom[j][0]; } int swap[] = {mom[k][0], mom[k][1], mom[k][2], mom[k][3]}; for(int j=0; j<4; j++) mom[k][j] = mom[i][j]; for(int j=0; j<4; j++) mom[i][j] = swap[j]; } corr_x = qpb_alloc(lt * sizeof(qpb_complex *)); corr_k = qpb_alloc(lt * sizeof(qpb_complex *)); for(int t=0; t<lt; t++) { corr_x[t] = qpb_alloc(lvol3d * sizeof(qpb_complex)); corr_k[t] = qpb_alloc(nmom * sizeof(qpb_complex)); } for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++) { corr[ich] = qpb_alloc(nmom * sizeof(qpb_complex *)); for(int p=0; p<nmom; p++) corr[ich][p] = qpb_alloc(lt * sizeof(qpb_complex)); ndirac = 0; switch(ich) { case S_S: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(CNORM(CMUL(qpb_gamma_5[i][j],qpb_gamma_5[k][l])) > 0.5 ) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = CMUL(qpb_gamma_5[i][j],qpb_gamma_5[k][l]); ndirac++; } } break; case G5_G5: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(i==j && k==l) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = (qpb_complex){1.,0.}; ndirac++; } } break; case G5_G4G5: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(i==j && CNORM(qpb_gamma_t[k][l]) > 0.5) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = qpb_gamma_t[k][l]; ndirac++; } } break; case G4G5_G5: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(CNORM(qpb_gamma_t[i][j]) > 0.5 && k==l ) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = qpb_gamma_t[i][j]; ndirac++; } } break; case G4G5_G4G5: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(CNORM(CMUL(qpb_gamma_t[i][j],qpb_gamma_t[k][l])) > 0.5 ) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = CMUL(qpb_gamma_t[i][j],qpb_gamma_t[k][l]); ndirac++; } } break; case G1_G1: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) { gamma_5x[i][j] = (qpb_complex){0., 0.}; for(int k=0; k<NS; k++) { gamma_5x[i][j].re += CMULR(qpb_gamma_5[i][k], qpb_gamma_x[k][j]); gamma_5x[i][j].im += CMULI(qpb_gamma_5[i][k], qpb_gamma_x[k][j]); } } for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(CNORM(CMUL(gamma_5x[i][j],gamma_5x[k][l])) > 0.5 ) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = CNEGATE(CMUL(gamma_5x[i][j],gamma_5x[k][l])); ndirac++; } } break; case G2_G2: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) { gamma_5y[i][j] = (qpb_complex){0., 0.}; for(int k=0; k<NS; k++) { gamma_5y[i][j].re += CMULR(qpb_gamma_5[i][k], qpb_gamma_y[k][j]); gamma_5y[i][j].im += CMULI(qpb_gamma_5[i][k], qpb_gamma_y[k][j]); } } for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(CNORM(CMUL(gamma_5y[i][j],gamma_5y[k][l])) > 0.5 ) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = CNEGATE(CMUL(gamma_5y[i][j],gamma_5y[k][l])); ndirac++; } } break; case G3_G3: for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) { gamma_5z[i][j] = (qpb_complex){0., 0.}; for(int k=0; k<NS; k++) { gamma_5z[i][j].re += CMULR(qpb_gamma_5[i][k], qpb_gamma_z[k][j]); gamma_5z[i][j].im += CMULI(qpb_gamma_5[i][k], qpb_gamma_z[k][j]); } } for(int i=0; i<NS; i++) for(int j=0; j<NS; j++) for(int k=0; k<NS; k++) for(int l=0; l<NS; l++) { if(CNORM(CMUL(gamma_5z[i][j],gamma_5z[k][l])) > 0.5 ) { mu[ndirac] = i; nu[ndirac] = j; ku[ndirac] = k; lu[ndirac] = l; prod[ndirac] = CNEGATE(CMUL(gamma_5z[i][j],gamma_5z[k][l])); ndirac++; } } break; } for(int t=0; t<lt; t++) for(int lv=0; lv<lvol3d; lv++) corr_x[t][lv] = (qpb_complex){0., 0.}; for(int col0=0; col0<NC; col0++) for(int col1=0; col1<NC; col1++) for(int id=0; id<ndirac; id++) { int i = mu[id]; int j = nu[id]; int k = ku[id]; int l = lu[id]; #ifdef OPENMP # pragma omp parallel for #endif for(int t=0; t<lt; t++) for(int lv=0; lv<lvol3d; lv++) { int v = blk_to_ext[lv + t*lvol3d]; qpb_complex hp = ((qpb_complex *)(light[col0+NC*l].index[v]))[col1+NC*i]; qpb_complex lp = ((qpb_complex *)(heavy[col0+NC*k].index[v]))[col1+NC*j]; /* c = x * conj(y) */ qpb_complex c = {hp.re*lp.re + hp.im*lp.im, hp.im*lp.re - hp.re*lp.im}; corr_x[t][lv].re += CMULR(prod[id], c); corr_x[t][lv].im += CMULI(prod[id], c); } } qpb_ft(corr_k, corr_x, lt, mom, nmom); for(int t=0; t<lt; t++) for(int p=0; p<nmom; p++) corr[ich][p][t] = corr_k[t][p]; } FILE *fp = NULL; if(am_master) { if((fp = fopen(outfile, "w")) == NULL) { error("%s: error opening file in \"w\" mode\n", outfile); MPI_Abort(MPI_COMM_WORLD, QPB_FILE_ERROR); exit(QPB_FILE_ERROR); } } for(int t=0; t<lt; t++) { char ctag[QPB_MAX_STRING]; for(int p=0; p<nmom; p++) for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++) { switch(ich) { case S_S: strcpy(ctag ,"1-1"); break; case G5_G5: strcpy(ctag ,"g5-g5"); break; case G5_G4G5: strcpy(ctag ,"g5-g4g5"); break; case G4G5_G5: strcpy(ctag ,"g4g5-g5"); break; case G4G5_G4G5: strcpy(ctag ,"g4g5-g4g5"); break; case G1_G1: strcpy(ctag ,"g1-g1"); break; case G2_G2: strcpy(ctag ,"g2-g2"); break; case G3_G3: strcpy(ctag ,"g3-g3"); break; } if(am_master) fprintf(fp, " %+2d %+2d %+2d %3d %+e %+e %s\n", mom[p][3], mom[p][2], mom[p][1], t, corr[ich][p][t].re, corr[ich][p][t].im, ctag); } } if(am_master) fclose(fp); for(int t=0; t<lt; t++) { free(corr_x[t]); free(corr_k[t]); } free(corr_x); free(corr_k); for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++) { for(int p=0; p<nmom; p++) free(corr[ich][p]); free(corr[ich]); } free(mom); return; }
/* FIX THIS - more efficient to take cross product of first two rows, dot with third. */ complex det_su3( su3_matrix *a ) { register complex cc,dd,sum; CMUL(a->e[0][0],a->e[1][1],cc); CMUL(cc,a->e[2][2],sum); CMUL(a->e[0][0],a->e[1][2],cc); CMUL(cc,a->e[2][1],dd); CSUB(sum,dd,sum); CMUL(a->e[0][1],a->e[1][2],cc); CMUL(cc,a->e[2][0],dd); CADD(sum,dd,sum); CMUL(a->e[0][1],a->e[1][0],cc); CMUL(cc,a->e[2][2],dd); CSUB(sum,dd,sum); CMUL(a->e[0][2],a->e[1][0],cc); CMUL(cc,a->e[2][1],dd); CADD(sum,dd,sum); CMUL(a->e[0][2],a->e[1][1],cc); CMUL(cc,a->e[2][0],dd); CSUB(sum,dd,sum); return(sum); }
void baryon_cont1(field_offset src1, field_offset src2, field_offset src3, int chi_b[4][4], int eps[3][3][3], Real prop[MAX_P][MAX_NT]) /* src1-3 are type wilson_propagator */ { register int i; register site *s; int my_t; int ci_1, ci_2, ci_3, si_1, si_2, si_3; int cf_1, cf_2, cf_3, sf_1, sf_2, sf_3; int chi_i, chi_f, eps_f, eps_i,j; Real factor; complex diquark, diquark_temp; /* for nonzero momentum */ Real cx,cy,cz,cxy,cyz,cxz,c111; FORALLSITES(i,s){ my_t = s->t; /* Sum over source and sink colors of quark 3 */ for(ci_3=0;ci_3<Nc;ci_3++)for(cf_3=0;cf_3<Nc;cf_3++){ diquark = cmplx(0.0,0.0); /* Sum over source spins of quarks 1 and 2 */ /* They will form the "di_quark" */ for(si_1=0;si_1<Ns;si_1++)for(si_2=0;si_2<Ns;si_2++){ chi_i = chi_b[si_1][si_2]; if( chi_i != 0 ){ /* Sum over sink spins of quarks 1 and 2 */ for(sf_1=0;sf_1<Ns;sf_1++)for(sf_2=0;sf_2<Ns;sf_2++){ chi_f = chi_b[sf_1][sf_2]; if( chi_f != 0 ){ /* Sum over source colors of quarks 1 and 2 */ for(ci_1=0;ci_1<Nc;ci_1++) if(ci_1 != ci_3) for(ci_2=0;ci_2<Nc;ci_2++){ eps_i = eps[ci_1][ci_2][ci_3]; if( eps_i != 0 ){ /* Sum over sink colors of quarks 1 and 2 */ for(cf_1=0;cf_1<Nc;cf_1++) if(cf_1 != cf_3) for(cf_2=0;cf_2<Nc;cf_2++){ eps_f = eps[cf_1][cf_2][cf_3]; if( eps_f != 0 ){ factor = (Real)(eps_f*eps_i*chi_i*chi_f); CMUL( ((wilson_propagator *)F_PT(s,src1))->c[cf_1].d[sf_1].d[si_1].c[ci_1], ((wilson_propagator *)F_PT(s,src2))->c[cf_2].d[sf_2].d[si_2].c[ci_2], diquark_temp); diquark.real += factor*diquark_temp.real; diquark.imag += factor*diquark_temp.imag; } /* eps_f */ } /* sum cf_1, cf_2 */ } /* eps_i */ } /* sum ci_1, ci_2 */ } /* chi_f */ } /* sum sf_1, sf_2 */ } /* chi_i */ } /* sum si_1, si_2 */ /* Sum over source and sink spin of uncontracted quark 3 */ /* Actually just use spin 1 */ si_3 = sf_3 = 1; CMUL(diquark, ((wilson_propagator *)F_PT(s,src3))->c[cf_3].d[sf_3].d[si_3].c[ci_3], diquark_temp); for(j=0;j<3;j++){ cz=cos(2.0*PI/(Real)nz*(Real)(s->z)*(Real)j); cx=cos(2.0*PI/(Real)nx*(Real)(s->x)*(Real)j); cy=cos(2.0*PI/(Real)ny*(Real)(s->y)*(Real)j); prop[j][my_t] += diquark_temp.real*(cx+cy+cz)/3.0; } cxy=cos(2.0*PI/(Real)nz*(Real)(s->x +s->y)); cxz=cos(2.0*PI/(Real)nz*(Real)(s->x +s->z)); cyz=cos(2.0*PI/(Real)nz*(Real)(s->y +s->z)); c111=cos(2.0*PI/(Real)nz*(Real)(s->x +s->y + s->z)); prop[3][my_t] += diquark_temp.real*(cxy+cyz+cxz)/3.0; prop[4][my_t] += diquark_temp.real*c111; /* } */ /* sum sf_3, si_3 */ } /* sum cf_3, ci_3 */ } /* FORALLSITES */
/** * Do a complex FFT with the parameters defined in ff_fft_init(). The * input data must be permuted before with s->revtab table. No * 1.0/sqrt(n) normalization is done. */ void ff_fft_calc_c(FFTContext *s, FFTComplex *z) { int ln = s->nbits; int j, np, np2; int nblocks, nloops; register FFTComplex *p, *q; FFTComplex *exptab = s->exptab; int l; FFTSample tmp_re, tmp_im; np = 1 << ln; /* pass 0 */ p=&z[0]; j=(np >> 1); do { BF(p[0].re, p[0].im, p[1].re, p[1].im, p[0].re, p[0].im, p[1].re, p[1].im); p+=2; } while (--j != 0); /* pass 1 */ p=&z[0]; j=np >> 2; if (s->inverse) { do { BF(p[0].re, p[0].im, p[2].re, p[2].im, p[0].re, p[0].im, p[2].re, p[2].im); BF(p[1].re, p[1].im, p[3].re, p[3].im, p[1].re, p[1].im, -p[3].im, p[3].re); p+=4; } while (--j != 0); } else { do { BF(p[0].re, p[0].im, p[2].re, p[2].im, p[0].re, p[0].im, p[2].re, p[2].im); BF(p[1].re, p[1].im, p[3].re, p[3].im, p[1].re, p[1].im, p[3].im, -p[3].re); p+=4; } while (--j != 0); } /* pass 2 .. ln-1 */ nblocks = np >> 3; nloops = 1 << 2; np2 = np >> 1; do { p = z; q = z + nloops; for (j = 0; j < nblocks; ++j) { BF(p->re, p->im, q->re, q->im, p->re, p->im, q->re, q->im); p++; q++; for(l = nblocks; l < np2; l += nblocks) { CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); BF(p->re, p->im, q->re, q->im, p->re, p->im, tmp_re, tmp_im); p++; q++; } p += nloops; q += nloops; } nblocks = nblocks >> 1; nloops = nloops << 1; } while (nblocks != 0); }
/* Inverse 1-D Discrete Cosine Transform. Result Y is scaled up by factor sqrt(8). Original Loeffler algorithm. */ static void idct_1d(int *Y) { int z1[8], z2[8], z3[8]; /* Stage 1: */ but(Y[0], Y[4], z1[1], z1[0]); /* rot(sqrt(2), 6, Y[2], Y[6], &z1[2], &z1[3]); */ z1[2] = SUB(CMUL(8867, Y[2]), CMUL(21407, Y[6])); z1[3] = ADD(CMUL(21407, Y[2]), CMUL(8867, Y[6])); but(Y[1], Y[7], z1[4], z1[7]); /* z1[5] = CMUL(sqrt(2), Y[3]); z1[6] = CMUL(sqrt(2), Y[5]); */ z1[5] = CMUL(23170, Y[3]); z1[6] = CMUL(23170, Y[5]); /* Stage 2: */ but(z1[0], z1[3], z2[3], z2[0]); but(z1[1], z1[2], z2[2], z2[1]); but(z1[4], z1[6], z2[6], z2[4]); but(z1[7], z1[5], z2[5], z2[7]); /* Stage 3: */ z3[0] = z2[0]; z3[1] = z2[1]; z3[2] = z2[2]; z3[3] = z2[3]; /* rot(1, 3, z2[4], z2[7], &z3[4], &z3[7]); */ z3[4] = SUB(CMUL(13623, z2[4]), CMUL(9102, z2[7])); z3[7] = ADD(CMUL(9102, z2[4]), CMUL(13623, z2[7])); /* rot(1, 1, z2[5], z2[6], &z3[5], &z3[6]); */ z3[5] = SUB(CMUL(16069, z2[5]), CMUL(3196, z2[6])); z3[6] = ADD(CMUL(3196, z2[5]), CMUL(16069, z2[6])); /* Final stage 4: */ but(z3[0], z3[7], Y[7], Y[0]); but(z3[1], z3[6], Y[6], Y[1]); but(z3[2], z3[5], Y[5], Y[2]); but(z3[3], z3[4], Y[4], Y[3]); }
/* Hadron wave functions. */ void wavefunc_t() { register int i,j,n; register site *s; register complex cc; msg_tag *tag; Real finalrsq,scale,x; int tmin,tmax,cgn,color; /* for baryon code */ int ca,ca1,ca2,cb,cb1,cb2; void symmetry_combine(field_offset src,field_offset space,int size,int dir); void block_fourier( field_offset src, /* src is field to be transformed */ field_offset space, /* space is working space, same size as src */ int size, /* Size of field in bytes. The field must consist of size/sizeof(complex) consecutive complex numbers. For example, an su3_vector is 3 complex numbers. */ int isign); /* 1 for x -> k, -1 for k -> x */ void fourier( field_offset src, /* src is field to be transformed */ field_offset space, /* space is working space, same size as src */ int size, /* Size of field in bytes. The field must consist of size/sizeof(complex) consecutive complex numbers. For example, an su3_vector is 3 complex numbers. */ int isign); /* 1 for x -> k, -1 for k -> x */ void write_wf(field_offset src,char *string,int tmin,int tmax); /* Fix TUP Coulomb gauge - gauge links only*/ rephase( OFF ); gaugefix(TUP,(Real)1.8,500,(Real)GAUGE_FIX_TOL); rephase( ON ); for(color=0;color<3;color++){ /* Make wall source */ FORALLSITES(i,s){ for(j=0;j<3;j++)s->phi.c[j]=cmplx(0.0,0.0); if( s->x%2==0 && s->y%2==0 && s->z%2==0 && s->t==0 ){ s->phi.c[color] = cmplx(-1.0,0.0); } } /* do a C.G. (source in phi, result in xxx) */ load_ferm_links(&fn_links); cgn = ks_congrad(F_OFFSET(phi),F_OFFSET(xxx),mass, niter, rsqprop, PRECISION, EVEN, &finalrsq, &fn_links); /* Multiply by -Madjoint, result in propmat[color] */ dslash_site( F_OFFSET(xxx), F_OFFSET(propmat[color]), ODD, &fn_links); scalar_mult_latvec( F_OFFSET(xxx), (Real)(-2.0*mass), F_OFFSET(propmat[color]), EVEN); } /* construct the diquark propagator--uses tempmat1 and do this before you fft the quark propagator */ FORALLSITES(i,s){ for(ca=0;ca<3;ca++)for(cb=0;cb<3;cb++){ ca1= (ca+1)%3; ca2= (ca+2)%3; cb1= (cb+1)%3; cb2= (cb+2)%3; CMUL((s->propmat[ca1].c[cb1]),(s->propmat[ca2].c[cb2]), (s->tempmat1.e[ca][cb])); CMUL((s->propmat[ca1].c[cb2]),(s->propmat[ca2].c[cb1]), cc); CSUB((s->tempmat1.e[ca][cb]),cc,(s->tempmat1.e[ca][cb])); } } /* complex conjugate the diquark prop */ FORALLSITES(i,s){ for(ca=0;ca<3;ca++)for(cb=0;cb<3;cb++){ CONJG((s->tempmat1.e[ca][cb]),(s->tempmat1.e[ca][cb])); } } /* Transform the diquark propagator. */ block_fourier( F_OFFSET(tempmat1), F_OFFSET(tempvec[0]), 3*sizeof(su3_vector), FORWARDS); /* complex conjugate the diquark prop. Now we have D(-k) for convolution */ FORALLSITES(i,s){ for(ca=0;ca<3;ca++)for(cb=0;cb<3;cb++){ CONJG((s->tempmat1.e[ca][cb]),(s->tempmat1.e[ca][cb])); } } /* Transform the propagator. */ block_fourier( F_OFFSET(propmat[0]), F_OFFSET(tempvec[0]), 3*sizeof(su3_vector), FORWARDS); /* CODE SPECIFIC TO PARTICULAR PARTICLES */ /* MESON CODE */ /* Square the result, component by component, sum over source and sink colors, result in ttt.c[0] */ FORALLSITES(i,s){ s->ttt.c[0].real = s->ttt.c[0].imag = 0.0; for(color=0;color<3;color++){ s->ttt.c[0].real += magsq_su3vec( &(s->propmat[color]) ); } }