Beispiel #1
0
static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
                         ptrdiff_t stride)
{
    FFTComplex fft15in[15];
    FFTComplex *z = (FFTComplex *)dst;
    int i, j, len8 = s->len4 >> 1, l_ptwo = 1 << s->ptwo_fft.nbits;
    const float *in1 = src, *in2 = src + (s->len2 - 1) * stride;

    /* Reindex input, putting it into a buffer and doing an Nx15 FFT */
    for (i = 0; i < l_ptwo; i++) {
        for (j = 0; j < 15; j++) {
            const int k = s->pfa_prereindex[i*15 + j];
            FFTComplex tmp = { *(in2 - 2*k*stride), *(in1 + 2*k*stride) };
            CMUL3(fft15in[j], tmp, s->twiddle_exptab[k]);
        }
        s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
    }

    /* Then a 15xN FFT (where N is a power of two) */
    for (i = 0; i < 15; i++)
        s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);

    /* Reindex again, apply twiddles and output */
    for (i = 0; i < len8; i++) {
        const int i0 = len8 + i, i1 = len8 - i - 1;
        const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1];

        CMUL(z[i1].re, z[i0].im, s->tmp[s1].im, s->tmp[s1].re,  s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re);
        CMUL(z[i0].re, z[i1].im, s->tmp[s0].im, s->tmp[s0].re,  s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re);
    }
}
Beispiel #2
0
static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp)
{
    int k, n4, n2, n, j;
    const uint16_t *revtab = s->fft.revtab;
    const FFTSample *tcos = s->tcos;
    const FFTSample *tsin = s->tsin;
    const FFTSample *in1, *in2;
    FFTComplex *z = (FFTComplex *)tmp;

    n = 1 << s->nbits;
    n2 = n >> 1;
    n4 = n >> 2;

    /* pre rotation */
    in1 = input;
    in2 = input + n2 - 1;
    for(k = 0; k < n4; k++) {
        j=revtab[k];
        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
        in1 += 2;
        in2 -= 2;
    }
    ff_fft_calc(&s->fft, z);

    /* post rotation + reordering */
    /* XXX: optimize */
    for(k = 0; k < n4; k++) {
        CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]);
    }
}
Beispiel #3
0
void
qpb_ft(qpb_complex **out, qpb_complex **in, int n, int mom[][4], int nmom)
{
  int lvol = problem_params.l_vol;
  int lt = problem_params.l_dim[0];
  int lvol3d = lvol/lt;
  double pi = atan(1.0)*4.0;
  
  for(int p=0; p<nmom; p++)
    {
      for(int i=0; i<n; i++)
	out[i][p] = (qpb_complex){0., 0.};
      
      for(int lv=0; lv<lvol3d; lv++)
	{
	  unsigned short int *gdim = problem_params.g_dim;
	  unsigned short int *ldim = problem_params.l_dim;
	  int lx = X_INDEX(lv, ldim);
	  int ly = Y_INDEX(lv, ldim);
	  int lz = Z_INDEX(lv, ldim);
	  unsigned short int *coords = problem_params.coords;

	  int x = coords[3]*ldim[3]+lx;
	  int y = coords[2]*ldim[2]+ly;
	  int z = coords[1]*ldim[1]+lz;

	  qpb_double phi = (double)((double)x*mom[p][3]/gdim[3] + 
				    (double)y*mom[p][2]/gdim[2] + 
				    (double)z*mom[p][1]/gdim[1]);
		  
	  phi = phi*2*pi;
	  qpb_complex phase = {cos(phi),-sin(phi)};
#ifdef OPENMP
#	pragma omp parallel for
#endif
	  for(int i=0; i<n; i++)
	    {
	      qpb_complex c;
	      c = CMUL(phase, in[i][lv]);
	      out[i][p].re += c.re;
	      out[i][p].im += c.im;
	    }
	}
    }
  /*
   * Do this outside of OpenMP
   */
  for(int p=0; p<nmom; p++)
    for(int i=0; i<n; i++)
      {
	qpb_complex recv;	  
	MPI_Reduce(&out[i][p].re, &recv.re, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
	MPI_Reduce(&out[i][p].im, &recv.im, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
	MPI_Bcast(&recv.re, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Bcast(&recv.im, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	out[i][p] = recv;
      }
  
  return;
}
Beispiel #4
0
static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
                            ptrdiff_t stride, float scale)
{
    FFTComplex *z = (FFTComplex *)dst;
    const int len8 = s->len4 / 2;
    const float *in1 = src;
    const float *in2 = src + (s->len2 - 1) * stride;
    int i;

    for (i = 0; i < s->len4; i++) {
        FFTComplex tmp = { *in2, *in1 };
        CMUL(s->tmp[i], tmp, s->twiddle_exptab[i]);
        in1 += 2 * stride;
        in2 -= 2 * stride;
    }

    fft_calc(s, z, s->tmp, s->fft_n, 1);

    for (i = 0; i < len8; i++) {
        float r0, i0, r1, i1;

        CMUL3(r0, i1, z[len8 - i - 1].im, z[len8 - i - 1].re,  s->twiddle_exptab[len8 - i - 1].im, s->twiddle_exptab[len8 - i - 1].re);
        CMUL3(r1, i0, z[len8 + i].im,     z[len8 + i].re,      s->twiddle_exptab[len8 + i].im,     s->twiddle_exptab[len8 + i].re);
        z[len8 - i - 1].re = scale * r0;
        z[len8 - i - 1].im = scale * i0;
        z[len8 + i].re     = scale * r1;
        z[len8 + i].im     = scale * i1;
    }
}
Beispiel #5
0
void c_scalar_mult_wvec(wilson_vector *src, complex *phase,
			wilson_vector *dest) {

#ifndef FAST
register int i,j;
complex t;
    for(i=0;i<4;i++){
           for(j=0;j<3;j++){
		CMUL( src->d[i].c[j], *phase, dest->d[i].c[j] );
           }
    }

#else
register int i,j;
#ifdef NATIVEDOUBLE
register double sr,si,br,bi;
#else
register float sr,si,br,bi;
#endif

    sr = (*phase).real; si = (*phase).imag;

    for(i=0;i<4;i++){
	for(j=0;j<3;j++){
	    br=src->d[i].c[j].real; bi=src->d[i].c[j].imag;

	    dest->d[i].c[j].real = sr*br - si*bi;
	    dest->d[i].c[j].imag = sr*bi + si*br;
	}
    }
#endif
}
Beispiel #6
0
/*
 * FFT of the length 15 * (2^N)
 */
static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in,
                     int N, ptrdiff_t stride)
{
    if (N) {
        const FFTComplex *exptab = s->exptab[N];
        const int len2 = 15 * (1 << (N - 1));
        int k;

        fft_calc(s, out,        in,          N - 1, stride * 2);
        fft_calc(s, out + len2, in + stride, N - 1, stride * 2);

        for (k = 0; k < len2; k++) {
            FFTComplex t;

            CMUL(t, out[len2 + k], exptab[k]);

            out[len2 + k].re = out[k].re - t.re;
            out[len2 + k].im = out[k].im - t.im;

            out[k].re += t.re;
            out[k].im += t.im;
        }
    } else
        fft15(s, out, in, stride);
}
Beispiel #7
0
void LDL_dsolve(
	int n,		/* D is n-by-n, where n >= 0 */
	double X[],	/* size n.	right-hand-side on input, soln. on output */
	double D[]	/* input of size n, not modified */
){
	int j;
	for (j = 0; j < n; j++){
		//X[j] *= D[j]; // D is actually inv(D)
		double s[2], t[2];
		CMUL(&D[8*j+0], &X[4*j+0], s);
		CADDPROD(s, &D[8*j+4], &X[4*j+2]);
		CMUL(&D[8*j+2], &X[4*j+0], t);
		CADDPROD(t, &D[8*j+6], &X[4*j+2]);
		CSET(&X[4*j+0], s);
		CSET(&X[4*j+2], t);
	}
}
Beispiel #8
0
/**
 * Compute inverse MDCT of size N = 2^nbits
 * @param output N samples
 * @param input N/2 samples
 * @param tmp N/2 samples
 */
void ff_imdct_calc(MDCTContext *s, FFTSample *output, 
                   const FFTSample *input, FFTSample *tmp)
{
    int k, n8, n4, n2, n, j;
    const uint16_t *revtab = s->fft.revtab;
    const FFTSample *tcos = s->tcos;
    const FFTSample *tsin = s->tsin;
    const FFTSample *in1, *in2;
    FFTComplex *z = (FFTComplex *)tmp;

    n = 1 << s->nbits;
    n2 = n >> 1;
    n4 = n >> 2;
    n8 = n >> 3;

    /* pre rotation */
    in1 = input;
    in2 = input + n2 - 1;
    for(k = 0; k < n4; k++) {
        j=revtab[k];
        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
        in1 += 2;
        in2 -= 2;
    }
    fft_calc(&s->fft, z);

    /* post rotation + reordering */
    /* XXX: optimize */
    for(k = 0; k < n4; k++) {
        CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]);
    }
    for(k = 0; k < n8; k++) {
        output[2*k] = -z[n8 + k].im;
        output[n2-1-2*k] = z[n8 + k].im;

        output[2*k+1] = z[n8-1-k].re;
        output[n2-1-2*k-1] = -z[n8-1-k].re;

        output[n2 + 2*k]=-z[k+n8].re;
        output[n-1- 2*k]=-z[k+n8].re;

        output[n2 + 2*k+1]=z[n8-k-1].im;
        output[n-2 - 2 * k] = z[n8-k-1].im;
    }
}
Beispiel #9
0
/**
 * Compute MDCT of size N = 2^nbits
 * @param input N samples
 * @param out N/2 samples
 */
void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
{
    int i, j, n, n8, n4, n2, n3;
    FFTDouble re, im;
    const uint16_t *revtab = s->revtab;
    const FFTSample *tcos = s->tcos;
    const FFTSample *tsin = s->tsin;
    FFTComplex *x = (FFTComplex *)out;

    n = 1 << s->mdct_bits;
    n2 = n >> 1;
    n4 = n >> 2;
    n8 = n >> 3;
    n3 = 3 * n4;

    /* pre rotation */
    for(i = 0; i < n8; i++)
    {
        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
        j = revtab[i];
        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);

        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
        j = revtab[n8 + i];
        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
    }

    s->fft_calc(s, x);

    /* post rotation */
    for(i = 0; i < n8; i++)
    {
        FFTSample r0, i0, r1, i1;
        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
        x[n8-i-1].re = r0;
        x[n8-i-1].im = i0;
        x[n8+i  ].re = r1;
        x[n8+i  ].im = i1;
    }
}
Beispiel #10
0
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
{
    int i, j;
    const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1;
    const int l_ptwo = 1 << s->ptwo_fft.nbits;
    FFTComplex fft15in[15];

    /* Folding and pre-reindexing */
    for (i = 0; i < l_ptwo; i++) {
        for (j = 0; j < 15; j++) {
            float re, im;
            const int k = s->pfa_prereindex[i*15 + j];
            if (k < len8) {
                re = -src[2*k+len3] - src[len3-1-2*k];
                im = -src[len4+2*k] + src[len4-1-2*k];
            } else {
                re =  src[2*k-len4] - src[1*len3-1-2*k];
                im = -src[2*k+len4] - src[5*len4-1-2*k];
            }
            CMUL(fft15in[j].re, fft15in[j].im, re, im, s->twiddle_exptab[k].re, -s->twiddle_exptab[k].im);
        }
        s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
    }

    /* Then a 15xN FFT (where N is a power of two) */
    for (i = 0; i < 15; i++)
        s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);

    /* Reindex again, apply twiddles and output */
    for (i = 0; i < len8; i++) {
        float re0, im0, re1, im1;
        const int i0 = len8 + i, i1 = len8 - i - 1;
        const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1];

        CMUL(im1, re0, s->tmp[s1].re, s->tmp[s1].im, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re);
        CMUL(im0, re1, s->tmp[s0].re, s->tmp[s0].im, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re);
        dst[2*i1*stride         ] = re0;
        dst[2*i1*stride + stride] = im0;
        dst[2*i0*stride         ] = re1;
        dst[2*i0*stride + stride] = im1;
    }
}
void mult_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c  ){
register int i,j;
register complex x,y;
    for(i=0;i<3;i++){
	x.real=x.imag=0.0;
	for(j=0;j<3;j++){
	    CMUL( a->e[i][j] , b->c[j] , y )
	    CSUM( x , y );
	}
	c->c[i] = x;
    }
}
Beispiel #12
0
/**
 * Compute MDCT of size N = 2^nbits
 * @param input N samples
 * @param out N/2 samples
 * @param tmp temporary storage of N/2 samples
 */
void ff_mdct_calc(MDCTContext *s, FFTSample *out,
                  const FFTSample *input, FFTSample *tmp)
{
    int i, j, n, n8, n4, n2, n3;
    FFTSample re, im, re1, im1;
    const uint16_t *revtab = s->fft.revtab;
    const FFTSample *tcos = s->tcos;
    const FFTSample *tsin = s->tsin;
    FFTComplex *x = (FFTComplex *)tmp;

    n = 1 << s->nbits;
    n2 = n >> 1;
    n4 = n >> 2;
    n8 = n >> 3;
    n3 = 3 * n4;

    /* pre rotation */
    for(i=0;i<n8;i++) {
        re = -input[2*i+3*n4] - input[n3-1-2*i];
        im = -input[n4+2*i] + input[n4-1-2*i];
        j = revtab[i];
        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);

        re = input[2*i] - input[n2-1-2*i];
        im = -(input[n2+2*i] + input[n-1-2*i]);
        j = revtab[n8 + i];
        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
    }

    ff_fft_calc(&s->fft, x);

    /* post rotation */
    for(i=0;i<n4;i++) {
        re = x[i].re;
        im = x[i].im;
        CMUL(re1, im1, re, im, -tsin[i], -tcos[i]);
        out[2*i] = im1;
        out[n2-1-2*i] = re1;
    }
}
Beispiel #13
0
static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, ptrdiff_t stride)
{
    const FFTComplex *exptab = s->exptab[0];
    FFTComplex tmp[5];
    FFTComplex tmp1[5];
    FFTComplex tmp2[5];
    int k;

    fft5(tmp,  in,              stride * 3);
    fft5(tmp1, in +     stride, stride * 3);
    fft5(tmp2, in + 2 * stride, stride * 3);

    for (k = 0; k < 5; k++) {
        FFTComplex t1, t2;

        CMUL(t1, tmp1[k], exptab[k]);
        CMUL(t2, tmp2[k], exptab[2 * k]);
        out[k].re = tmp[k].re + t1.re + t2.re;
        out[k].im = tmp[k].im + t1.im + t2.im;

        CMUL(t1, tmp1[k], exptab[k + 5]);
        CMUL(t2, tmp2[k], exptab[2 * (k + 5)]);
        out[k + 5].re = tmp[k].re + t1.re + t2.re;
        out[k + 5].im = tmp[k].im + t1.im + t2.im;

        CMUL(t1, tmp1[k], exptab[k + 10]);
        CMUL(t2, tmp2[k], exptab[2 * k + 5]);
        out[k + 10].re = tmp[k].re + t1.re + t2.re;
        out[k + 10].im = tmp[k].im + t1.im + t2.im;
    }
}
Beispiel #14
0
/******************************************************************************
FUNCTION:
   check_unitarity
******************************************************************************/
void check_unitarity(su2_matrix *psub)
{
   complex sum;
   complex conj;
   complex prod;
   Real tol;

   tol = 0.0001;
   sum.real = 0.0;
   sum.imag = 0.0;
   CONJG(psub->e[0][0], conj);
   CMUL(psub->e[0][0], conj, prod);
   CSUM(sum, prod);
   CONJG(psub->e[1][0], conj);
   CMUL(psub->e[1][0], conj, prod);
   CSUM(sum, prod);
   if (sum.real < 1.0 - tol || sum.real > 1.0 + tol
       || sum.imag < -tol || sum.imag > tol)
   {
      printf("%g\t%g\n", sum.real, sum.imag);
   }
}
Beispiel #15
0
/**
 * Compute the middle half of the inverse MDCT of size N = 2^nbits,
 * thus excluding the parts that can be derived by symmetry
 * @param output N/2 samples
 * @param input N/2 samples
 */
void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
{
    int k, n8, n4, n2, n, j;
    const uint16_t *revtab = s->revtab;
    const FFTSample *tcos = s->tcos;
    const FFTSample *tsin = s->tsin;
    const FFTSample *in1, *in2;
    FFTComplex *z = (FFTComplex *)output;

    n = 1 << s->mdct_bits;
    n2 = n >> 1;
    n4 = n >> 2;
    n8 = n >> 3;

    /* pre rotation */
    in1 = input;
    in2 = input + n2 - 1;
    for(k = 0; k < n4; k++)
    {
        j = revtab[k];
        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
        in1 += 2;
        in2 -= 2;
    }
    s->fft_calc(s, z);

    /* post rotation + reordering */
    for(k = 0; k < n8; k++)
    {
        FFTSample r0, i0, r1, i1;
        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
        z[n8-k-1].re = r0;
        z[n8-k-1].im = i0;
        z[n8+k  ].re = r1;
        z[n8+k  ].im = i1;
    }
}
Beispiel #16
0
void
llfat_mult_su3_nn( su3_matrix *a, su3_matrix *b, su3_matrix *c )
{
    int i,j,k;
    typeof(a->e[0][0]) x,y;
    for(i=0; i<3; i++)for(j=0; j<3; j++) {
            x.real=x.imag=0.0;
            for(k=0; k<3; k++) {
                CMUL( a->e[i][k] , b->e[k][j] , y );
                CSUM( x , y );
            }
            c->e[i][j] = x;
        }
}
Beispiel #17
0
/* Computes the real trace of the su3 matrix product: ReTr(A*B) */
Real
real_trace_nn( su3_matrix *a, su3_matrix *b )
{
  register int i,j;
  register complex x;
  register Real sum;

  sum = 0.0;
  for( i=0; i<3; i++ ) for( j=0; j<3; j++ ) {
    CMUL( a->e[i][j], b->e[j][i], x );
    sum += x.real;
  }

  return sum;
}
Beispiel #18
0
void MDCT::calcHalfIMDCT(float *output, const float *input) {
	Complex *z = (Complex *) output;

	const int size2 = _size >> 1;
	const int size4 = _size >> 2;
	const int size8 = _size >> 3;

	const uint16 *revTab = _fft->getRevTab();

	// Pre rotation
	const float *in1 = input;
	const float *in2 = input + size2 - 1;
	for(int k = 0; k < size4; k++) {
		const int j = revTab[k];

		CMUL(z[j].re, z[j].im, *in2, *in1, _tCos[k], _tSin[k]);

		in1 += 2;
		in2 -= 2;
	}

	_fft->calc(z);

	// Post rotation + reordering
	for(int k = 0; k < size8; k++) {
		float r0, i0, r1, i1;

		CMUL(r0, i1, z[size8-k-1].im, z[size8-k-1].re, _tSin[size8-k-1], _tCos[size8-k-1]);
		CMUL(r1, i0, z[size8+k  ].im, z[size8+k  ].re, _tSin[size8+k  ], _tCos[size8+k  ]);

		z[size8 - k - 1].re = r0;
		z[size8 - k - 1].im = i0;
		z[size8 + k    ].re = r1;
		z[size8 + k    ].im = i1;
	}
}
Beispiel #19
0
/******************************************************************************
FUNCTION:
   mult_su2
******************************************************************************/
void mult_su2(su2_matrix *pprod, su2_matrix *pmat1, su2_matrix *pmat2)
{
   int p;                       /* index over matrix row */
   int q;                       /* index over matrix column */
   int k;                       /* summation index */
   complex term;                /* term in row times column */

   for (p = 0; p < 2; p++)
   {
      for (q = 0; q < 2; q++)
      {
         pprod->e[p][q].real = 0.0;
         pprod->e[p][q].imag = 0.0;
         for (k = 0; k < 2; k++)
         {
            CMUL(pmat1->e[p][k], pmat2->e[k][q], term);
            CSUM(pprod->e[p][q], term);
         }
      }
   }
}
Beispiel #20
0
// z[0],z[1] is (1,1) entry of matrix
// z[2],z[3] is (2,1) entry of matrix, etc.
static void invert_c2x2(double z[]){
	// [a c]
	// [b d]
	// ad-bc
	double idet[2], tmp[2];
	CMUL(&z[2*0], &z[2*3], idet);
	CMUL(&z[2*1], &z[2*2], tmp);
	CSUB(idet,tmp);
	CINV(idet);
	CSWP(&z[2*0], &z[2*3]);
	CNEG(&z[2*1]);
	CNEG(&z[2*2]);
	CMUL(&z[2*0], idet, tmp); CSET(&z[2*0], tmp);
	CMUL(&z[2*1], idet, tmp); CSET(&z[2*1], tmp);
	CMUL(&z[2*2], idet, tmp); CSET(&z[2*2], tmp);
	CMUL(&z[2*3], idet, tmp); CSET(&z[2*3], tmp);
}
Beispiel #21
0
void ff_imdct_half_paired(FFTContext *s, FFTSample *output, const FFTSample *input)
{
	int n = 1 << s->mdct_bits;
	int n2 = n >> 1;
	int n4 = n >> 2;
	int n8 = n >> 3;
	
	const uint16_t *revtab = s->revtab;
	const FFTSample *tcos = s->tcos;
	const FFTSample *tsin = s->tsin;
	
	vector float pair[4], sub[2], add[2];
	vector float result, cos, sin;
	
	FFTSample *base[6][2] = {{input-2,input+n2},{tcos-2,tcos+n4},{tsin-2,tsin+n4},
					{output+n4,output+n4-2},{tcos+n8,tcos+n8-2},{tsin+n8,tsin+n8-2}};
	
	int k, j;
	for (k=0, j=n4-2; k<n8; k+=2, j-=2) {
 		pair[0] = psq_lu(8,base[0][0],0,0);
		pair[1] = psq_lu(8,base[0][0],0,0);
		
		pair[2] = psq_lu(-8,base[0][1],0,0);
		pair[3] = psq_lu(-8,base[0][1],0,0);
		
		cos = psq_lu(8,base[1][0],0,0);
		sin = psq_lu(8,base[2][0],0,0);
		
		CMUL(sub[0],add[0],pair[0],pair[1],pair[2],pair[3],sin,cos);
		
		cos = psq_lu(-8,base[1][1],0,0);
		sin = psq_lu(-8,base[2][1],0,0);
		
		CMUL(sub[1],add[1],pair[3],pair[2],pair[1],pair[0],sin,cos);
		
		result = paired_merge00(sub[0], add[0]);
		psq_stx(result,revtab[k]*8,output,0,0);
		result = paired_merge11(sub[0], add[0]);
		psq_stx(result,revtab[k+1]*8,output,0,0);
		
		result = paired_merge00(sub[1], add[1]);
		psq_stx(result,revtab[j]*8,output,0,0);
		result = paired_merge11(sub[1], add[1]);
		psq_stx(result,revtab[j+1]*8,output,0,0);
	}
	
	s->fft_calc(s, (FFTComplex *)output);
	
	for (k=0; k<n8; k+=2) {
		pair[0] = psq_lu(-8,base[3][0],0,0);
		pair[1] = psq_l(-8,base[3][0],0,0);
		
		cos = psq_lu(-8,base[4][0],0,0);
		sin = psq_lu(-8,base[5][0],0,0);
		
		CMUL(sub[0],add[1],pair[1],pair[0],pair[1],pair[0],cos,sin);
		
		pair[0] = psq_lu(8,base[3][1],0,0);
		pair[1] = psq_l(8,base[3][1],0,0);
		
		cos = psq_lu(8,base[4][1],0,0);
		sin = psq_lu(8,base[5][1],0,0);
		
		CMUL(sub[1],add[0],pair[0],pair[1],pair[0],pair[1],cos,sin);
		
		result = paired_merge10(sub[0], add[0]);
		psq_st(result,0,base[3][0],0,0);
		result = paired_merge01(sub[0], add[0]);
		psq_stu(result,-8,base[3][0],0,0);
		
		result = paired_merge01(sub[1], add[1]);
		psq_st(result,0,base[3][1],0,0);
		result = paired_merge10(sub[1], add[1]);
		psq_stu(result,8,base[3][1],0,0);
	}
}
Beispiel #22
0
void meson_cont_mom(complex prop[],
		    field_offset src1,field_offset src2,
		    int base_pt, int q_stride, int op_stride,
		    gamma_corr gamma_table[], int no_gamma_corr)
{
  register int i;
  register site *s; 
  
  double theta ; 
  double factx = 2.0*PI/(1.0*nx) ; 
  double facty = 2.0*PI/(1.0*ny) ; 
  double factz = 2.0*PI/(1.0*nz) ; 
  Real px,py,pz;
  complex phase_fact ; 
  
  int my_t;
  int cf, sf, si;
  int i_gamma_corr,q_pt,prop_pt;
  
  complex g1,g2;
  
  spin_wilson_vector localmat,localmat2;  /* temporary storage */
  spin_wilson_vector quark;               /* temporary storage for quark */
  spin_wilson_vector antiquark;           /* temporary storage for antiquark */
  
  
  FORALLSITES(i,s)
    {
      
      my_t = s->t;
      
      /* copy src2 into quark */
      for(si=0;si<4;si++)
	for(sf=0;sf<4;sf++)
	  for(cf=0;cf<3;cf++)
	    {
	      quark.d[si].d[sf].c[cf] = 
		((spin_wilson_vector *)F_PT(s,src2))->d[si].d[sf].c[cf];
	    }
      
      /* next, construct antiquark from src1 */
      /*first, dirac multiplication by the source gamma matrices (on left) */
      
      /*  antiquark = c.c. of quark propagator */
      for(si=0;si<4;si++)
	for(sf=0;sf<4;sf++)
	  for(cf=0;cf<3;cf++)
	    {
	      
	      CONJG(((spin_wilson_vector *)F_PT(s,src1))->d[si].d[sf].c[cf],
		    antiquark.d[sf].d[si].c[cf]); 
	      
	    }
      
      
      /* left multiply antiquark by source gamma matrices,
	 beginning with gamma_5 for quark -> antiquark */
      mult_sw_by_gamma_l( &antiquark, &localmat, G5);    
      
      /* right dirac multiplication by gamma-5 (finishing up antiquark) */
      mult_sw_by_gamma_r( &localmat, &antiquark, G5);     
      
      /* Run through the table of source-sink gamma matrices */
      for(i_gamma_corr=0; i_gamma_corr<no_gamma_corr; i_gamma_corr++)
	{
	  /* left multiply by the particular source dirac matrices */
	  /* result in localmat2 */
	  
	  mult_sw_by_gamma_l( &antiquark, &localmat, 
			     gamma_table[i_gamma_corr].gin);
	  
	  mult_sw_by_gamma_r( &localmat, &localmat2,
			     gamma_table[i_gamma_corr].gout);
	  
	  /* Run through all sink momenta */
	  for(q_pt=0; q_pt<no_q_values; q_pt++)
	    {
	      px = q_momstore[q_pt][0];
	      py = q_momstore[q_pt][1];
	      pz = q_momstore[q_pt][2];
	      
	      theta = factx*(s->x)*px + facty*(s->y)*py + factz*(s->z)*pz; 
	      phase_fact = cmplx((Real) cos(theta)  , (Real) sin(theta)) ; 
	      
	      prop_pt = my_t + base_pt + q_pt * q_stride + i_gamma_corr * op_stride;
	      
	      /* trace over propagators */
	      for(si=0;si<4;si++)
		for(sf=0;sf<4;sf++)
		  for(cf=0;cf<3;cf++)
		    {
		      g1 = localmat2.d[si].d[sf].c[cf];
		      CMUL( quark.d[sf].d[si].c[cf] , phase_fact,  g2);    
		      
		      prop[prop_pt ].real += 
			(g1.real*g2.real - g1.imag*g2.imag); 
		      prop[prop_pt ].imag += 
			(g1.real*g2.imag + g1.imag*g2.real); 
		    }
	    }
	}
    }  /**** end of the loop over lattice sites ******/
Beispiel #23
0
/* do measurements: load density, ploop, etc. and phases onto lattice */
void measure() {
   register int i,j,k, c;
   register site *s;
   int dx,dy,dz;	/* separation for correlated observables */
   int dir;		/* direction of separation */
   msg_tag *tag;
   register complex cc,dd;	/*scratch*/
   complex ztr, zcof, znum, zdet, TC, zd, density, zphase;
   complex p[4]; /* probabilities of n quarks at a site */
   complex np[4]; /* probabilities at neighbor site */
   complex pp[4][4]; /* joint probabilities of n here and m there */
   complex zplp, plp;
   Real locphase, phase;


   /* First make T (= timelike P-loop) from s->ploop_t 
      T stored in s->tempmat1
   */
   ploop_less_slice(nt-1,EVEN);
   ploop_less_slice(nt-1,ODD);

   phase = 0.;
   density = plp = cmplx(0.0, 0.0);
   for(j=0;j<4;j++){
	p[j]=cmplx(0.0,0.0);
	for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0);
   }
   FORALLSITES(i,s) {
      if(s->t != nt-1) continue;
      mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1));

      zplp = trace_su3(&(s->tempmat1));
      CSUM(plp, zplp);

      ztr = trace_su3(&(s->tempmat1));
      CONJG(ztr, zcof);

      for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C;
      zdet = det_su3(&(s->tempmat1));
      znum = numer(C, ztr, zcof);
      CDIV(znum, zdet, zd);
      CSUM(density, zd);

      /* store n_quark probabilities at this site in lattice variable
	qprob[], accumulate sum over lattice in p[] */
      cc = cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]);
      CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]);
      CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]);
      cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]);

      locphase = carg(&zdet);
      phase += locphase;

   }
   g_floatsum( &phase );
   g_complexsum( &density );
   g_complexsum( &p[0] );
   g_complexsum( &p[1] );
   g_complexsum( &p[2] );
   g_complexsum( &p[3] );
   g_complexsum( &plp );
   CDIVREAL(density,(Real)(nx*ny*nz),density);
   CDIVREAL(p[0],(Real)(nx*ny*nz),p[0]);
   CDIVREAL(p[1],(Real)(nx*ny*nz),p[1]);
   CDIVREAL(p[2],(Real)(nx*ny*nz),p[2]);
   CDIVREAL(p[3],(Real)(nx*ny*nz),p[3]);
   CDIVREAL(plp,(Real)(nx*ny*nz),plp);

   zphase = ce_itheta(phase);
   if(this_node == 0) {
      printf("ZMES\t%e\t%e\t%e\t%e\t%e\t%e\n", zphase.real, zphase.imag, 
	                               density.real, density.imag,
	                               plp.real, plp.imag);
      printf("PMES\t%e\t%e\t%e\t%e\t%e\t%e\t%e\t%e\n",
				p[0].real, p[0].imag, p[1].real, p[1].imag,
				p[2].real, p[2].imag, p[3].real, p[3].imag );
   }

#ifdef PPCORR
   dx=1; dy=0; dz=0;	/* Temporary - right now we just do nearest neighbor */
   for(dir=XUP;dir<=ZUP;dir++){
      tag = start_gather_site( F_OFFSET(qprob[0]), 4*sizeof(complex), dir,
	   EVENANDODD, gen_pt[0] );
      wait_gather(tag);
      FORALLSITES(i,s)if(s->t==nt-1){
        for(j=0;j<4;j++)for(k=0;k<4;k++){
	   CMUL( (s->qprob)[j],((complex *)gen_pt[0][i])[k],cc);
           CSUM(pp[j][k],cc);
        }
      }
      cleanup_gather(tag);
   }

   /* density correlation format:
	PP dx dy dz n1 n2 real imag */
   for(j=0;j<4;j++)for(k=0;k<4;k++){
     g_complexsum( &pp[j][k] );
     CDIVREAL(pp[j][k],(Real)(3*nx*ny*nz),pp[j][k]);
     if(this_node==0)
       printf("PP %d %d %d   %d %d   %e   %e\n",dx,dy,dz,j,k,
	  pp[j][k].real,pp[j][k].imag);
   }
#endif /*PPCORR*/
}
Beispiel #24
0
void setup_lambda() {
  int i, j, k, l, count;
  complex inv_sqrt = cmplx(1.0 / sqrt(2.0), 0.0);
  complex i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(2.0));

#ifdef DEBUG_CHECK
  int a;
  complex trace, tt;
  node0_printf("Computing generators for U(N)\n");
#endif

  // Make sure Lambda matrices are initialized
  for (i = 0; i < DIMF; i++)
    clear_mat(&(Lambda[i]));

  // N * (N - 1) off-diagonal SU(N) generators
  // (T^{ij, +})_{kl} = i * (de_{ki} de_{lj} + de_{kj} de_{li}) / sqrt(2)
  // (T^{ij, -})_{kl} = (de_{ki} de_{lj} - de_{kj} de_{ki}) / sqrt(2)
  // Sign in second chosen to match previous values
  count = 0;
  for (i = 0; i < NCOL; i++) {
    for (j = i + 1; j < NCOL; j++) {
      for (k = 0; k < NCOL; k++) {
        for (l = 0; l < NCOL; l++) {
          if (k == i && l == j) {
            CSUM(Lambda[count].e[k][l], i_inv_sqrt);
            CSUM(Lambda[count + 1].e[k][l], inv_sqrt);
          }
          else if (k == j && l == i) {
            CSUM(Lambda[count].e[k][l], i_inv_sqrt);
            CDIF(Lambda[count + 1].e[k][l], inv_sqrt);
          }
        }
      }
      count += 2;
    }
  }
  if (count != NCOL * (NCOL - 1)) {
    node0_printf("ERROR: Wrong number of off-diagonal generators, ");
    node0_printf("%d vs. %d\n", count, NCOL * (NCOL - 1));
    terminate(1);
  }

  // N - 1 diagonal SU(N) generators
  // T^k = i * diag(1, 1, ..., -k, 0, ..., 0) / sqrt(k * (k + 1))
  for (i = 0; i < NCOL - 1; i++) {
    j = NCOL * (NCOL - 1) + i;    // Index after +/- above
    k = i + 1;
    i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(k * (k + 1.0)));
    for (l = 0; l <= k; l++)
      Lambda[j].e[l][l] = i_inv_sqrt;
    CMULREAL(Lambda[j].e[k][k], -1.0 * k, Lambda[j].e[k][k]);
  }

  // U(1) generator i * I_N / sqrt(N)
  if (DIMF == NCOL * NCOL) {    // Allow SU(N) compilation for now
    i_inv_sqrt = cmplx(0.0, sqrt(one_ov_N));
    clear_mat(&(Lambda[DIMF - 1]));
    for (i = 0; i < NCOL; i++)
      Lambda[DIMF - 1].e[i][i] = i_inv_sqrt;
  }

#ifdef DEBUG_CHECK
  // Print Lambdas
  for (i = 0; i < DIMF; i++){
    node0_printf("Lambda[%d]\n",i);
    if (this_node == 0)
      dumpmat(&(Lambda[i]));
  }

  // Test group theory
  node0_printf("Check group theory ");
  node0_printf("Sum_a Lambda^a_{kl} Lambda^a_{ij} = -delta_kj delta_il\n");
  for (i = 0; i < NCOL; i++) {
    for (j = 0; j < NCOL; j++) {
      for (k = 0; k < NCOL; k++) {
        for (l = 0; l < NCOL; l++) {
          trace = cmplx(0, 0);
          for (a = 0; a < DIMF; a++) {
            CMUL(Lambda[a].e[k][l], Lambda[a].e[i][j], tt);
            CSUM(trace, tt);
          }
          if (cabs_sq(&trace) > IMAG_TOL)
            node0_printf("Sum_a La^a_{%d%d} La^a_{%d%d} = (%.4g, %.4g)\n",
                         k, j, i, l, trace.real, trace.imag);
        }
      }
    }
  }
#endif

  // Test orthogonality and compute products of Lambdas for fermion forces
#ifdef DEBUG_CHECK
  for (i = 0; i < DIMF; i++) {
    for (j = 0; j < DIMF; j++) {
      mult_nn(&(Lambda[i]), &(Lambda[j]), &tmat);
      trace = trace(&tmat);
      if (trace.real * trace.real > IMAG_TOL)
        node0_printf("Tr[T_%d T_%d] = (%.4g, %.4g)\n",
                     i, j, trace.real, trace.imag);
    }
  }
#endif
}
Beispiel #25
0
/*
 *  Computes meson 2pt function for gammas: 
 *  g5-g5, g5-g4g5, g4g5-g5, g4g5-g4g5, g1-g1, g2-g2, g3-g3
 *
 *  The function does not return anything. It writes the correlation functions
 *  to a file (as ascii).
 *
 *  Updated for non-zero momentum correlator. Correlator calculated explicitely
 *  for all momentum vectors (i.e. non-FFT)
 *  
 */  
void
qpb_mesons_2pt_corr(qpb_spinor_field *light, qpb_spinor_field *heavy, int max_q2, char outfile[])
{
  if(heavy == NULL)
    heavy = light;

  /* This should never happen. For now the package is built so that
     only x, y and z are parallelized accross MPI and t along OpenMP */
  if(problem_params.par_dir[0] == 1)
    {
      error(" %s() not implemented for distributed t-direction, quiting\n", __func__);
      exit(QPB_NOT_IMPLEMENTED_ERROR);
    }
  
  int lvol = problem_params.l_vol;
  int lt = problem_params.l_dim[0];
  int lvol3d = lvol/lt;
  qpb_complex **corr_x;
  qpb_complex **corr_k;
  qpb_complex **corr[QPB_N_MESON_2PT_CHANNELS];
  int N = (NS*NS*NS*NS);
  qpb_complex prod[N];
  int ndirac = 0;
  int mu[N],nu[N],ku[N],lu[N];
  qpb_complex gamma_5x[NS][NS];
  qpb_complex gamma_5y[NS][NS];
  qpb_complex gamma_5z[NS][NS];
  int nmom = 0, nq = (int)sqrt(max_q2)+1;
  int (*mom)[4];
  /*
    Count momentum vectors <= max_q2
   */
  for(int z=-nq; z<nq; z++)
    for(int y=-nq; y<nq; y++)
      for(int x=-nq; x<nq; x++)
	{
	  double q2 = x*x+y*y+z*z;
	  if(q2 <= max_q2)
	    nmom++;
	}
  
  mom = qpb_alloc(sizeof(int)*4*nmom);
  nmom = 0;

  /*
    Store momentum vectors <= max_q2
   */
  for(int z=-nq; z<nq; z++)
    for(int y=-nq; y<nq; y++)
      for(int x=-nq; x<nq; x++)
	{
	  double q2 = x*x+y*y+z*z;
	  if(q2 <= max_q2)
	    {
	      mom[nmom][3] = x;
	      mom[nmom][2] = y;
	      mom[nmom][1] = z;
	      mom[nmom][0] = q2;
	      nmom++;
	    }
	}
  

  /*
    Sort in ascending q^2 value
   */
  for(int i=0; i<nmom; i++)
    {
      int x = mom[i][0]; /* the q^2 value */
      int k = i;
      for(int j=i+1; j<nmom; j++)
	if(mom[j][0] < x)
	  {
	    k = j;
	    x = mom[j][0];
	  }
      int swap[] = {mom[k][0], mom[k][1], mom[k][2], mom[k][3]};
      for(int j=0; j<4; j++) mom[k][j] = mom[i][j];
      for(int j=0; j<4; j++) mom[i][j] = swap[j];
    }

  corr_x = qpb_alloc(lt * sizeof(qpb_complex *));
  corr_k = qpb_alloc(lt * sizeof(qpb_complex *));
  for(int t=0; t<lt; t++)
    {
      corr_x[t] = qpb_alloc(lvol3d * sizeof(qpb_complex));
      corr_k[t] = qpb_alloc(nmom * sizeof(qpb_complex));
    }

  for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++)
    {
      
      corr[ich] = qpb_alloc(nmom * sizeof(qpb_complex *));
      for(int p=0; p<nmom; p++)
	corr[ich][p] = qpb_alloc(lt * sizeof(qpb_complex));

      ndirac = 0;
      switch(ich)
	{
	case S_S:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(qpb_gamma_5[i][j],qpb_gamma_5[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CMUL(qpb_gamma_5[i][j],qpb_gamma_5[k][l]);
			ndirac++;
		      }
		  }
	  break;
	case G5_G5:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(i==j && k==l)
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = (qpb_complex){1.,0.};
			ndirac++;
		      }
		  }
	  break;
	case G5_G4G5:	
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(i==j && CNORM(qpb_gamma_t[k][l]) > 0.5)
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = qpb_gamma_t[k][l];
			ndirac++;
		      }
		  }
	  break;
	case G4G5_G5:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(qpb_gamma_t[i][j]) > 0.5 && k==l )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = qpb_gamma_t[i][j];
			ndirac++;
		      }
		  }
	  break;
	case G4G5_G4G5:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(qpb_gamma_t[i][j],qpb_gamma_t[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CMUL(qpb_gamma_t[i][j],qpb_gamma_t[k][l]);
			ndirac++;
		      }
		  }
	  break;
	case G1_G1:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      {
		gamma_5x[i][j] = (qpb_complex){0., 0.};
		for(int k=0; k<NS; k++)
		  {
		    gamma_5x[i][j].re += 
		      CMULR(qpb_gamma_5[i][k], qpb_gamma_x[k][j]);
		    gamma_5x[i][j].im += 
		      CMULI(qpb_gamma_5[i][k], qpb_gamma_x[k][j]);
		  }
	      }
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(gamma_5x[i][j],gamma_5x[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CNEGATE(CMUL(gamma_5x[i][j],gamma_5x[k][l]));
			ndirac++;
		      }
		  }
	  break;
	case G2_G2:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      {
		gamma_5y[i][j] = (qpb_complex){0., 0.};
		for(int k=0; k<NS; k++)
		  {
		    gamma_5y[i][j].re += 
		      CMULR(qpb_gamma_5[i][k], qpb_gamma_y[k][j]);
		    gamma_5y[i][j].im += 
		      CMULI(qpb_gamma_5[i][k], qpb_gamma_y[k][j]);
		  }
	      }
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(gamma_5y[i][j],gamma_5y[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CNEGATE(CMUL(gamma_5y[i][j],gamma_5y[k][l]));
			ndirac++;
		      }
		  }
	  break;
	case G3_G3:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      {
		gamma_5z[i][j] = (qpb_complex){0., 0.};
		for(int k=0; k<NS; k++)
		  {
		    gamma_5z[i][j].re += 
		      CMULR(qpb_gamma_5[i][k], qpb_gamma_z[k][j]);
		    gamma_5z[i][j].im += 
		      CMULI(qpb_gamma_5[i][k], qpb_gamma_z[k][j]);
		  }
	      }
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(gamma_5z[i][j],gamma_5z[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CNEGATE(CMUL(gamma_5z[i][j],gamma_5z[k][l]));
			ndirac++;
		      }
		  }
	  break;
	}

      for(int t=0; t<lt; t++)
	for(int lv=0; lv<lvol3d; lv++)
	  corr_x[t][lv] = (qpb_complex){0., 0.};

      for(int col0=0; col0<NC; col0++)
	for(int col1=0; col1<NC; col1++)
	  for(int id=0; id<ndirac; id++)
	    {
	      int i = mu[id];
	      int j = nu[id];
	      int k = ku[id];
	      int l = lu[id];
#ifdef OPENMP
#	pragma omp parallel for
#endif
	      for(int t=0; t<lt; t++)
		for(int lv=0; lv<lvol3d; lv++)
		  {
		    int v = blk_to_ext[lv + t*lvol3d];
		    qpb_complex hp = ((qpb_complex *)(light[col0+NC*l].index[v]))[col1+NC*i];
		    qpb_complex lp = ((qpb_complex *)(heavy[col0+NC*k].index[v]))[col1+NC*j];
		    /* c = x * conj(y) */
		    qpb_complex c = {hp.re*lp.re + hp.im*lp.im, hp.im*lp.re - hp.re*lp.im};
		    corr_x[t][lv].re += CMULR(prod[id], c);
		    corr_x[t][lv].im += CMULI(prod[id], c);
		  }
	    }

      qpb_ft(corr_k, corr_x, lt, mom, nmom);
      for(int t=0; t<lt; t++)
	for(int p=0; p<nmom; p++)
	  corr[ich][p][t] = corr_k[t][p];
      
    }
  
  FILE *fp = NULL;
  if(am_master)
    {
      if((fp = fopen(outfile, "w")) == NULL)
	{
	  error("%s: error opening file in \"w\" mode\n", outfile);
	  MPI_Abort(MPI_COMM_WORLD, QPB_FILE_ERROR);
	  exit(QPB_FILE_ERROR);
	}
    }
  for(int t=0; t<lt; t++)
    {
      char ctag[QPB_MAX_STRING];
      for(int p=0; p<nmom; p++)
	for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++)
	  {
	    switch(ich)
	      {
	      case S_S:
		strcpy(ctag ,"1-1");
		break;
	      case G5_G5:
		strcpy(ctag ,"g5-g5");
		break;
	      case G5_G4G5:
		strcpy(ctag ,"g5-g4g5");
		break;
	      case G4G5_G5:
		strcpy(ctag ,"g4g5-g5");
		break;
	      case G4G5_G4G5:
		strcpy(ctag ,"g4g5-g4g5");
		break;
	      case G1_G1:
		strcpy(ctag ,"g1-g1");
		break;
	      case G2_G2:
		strcpy(ctag ,"g2-g2");
		break;
	      case G3_G3:
		strcpy(ctag ,"g3-g3");
		break;
	      }
	    if(am_master)
	      fprintf(fp, " %+2d %+2d %+2d %3d %+e %+e %s\n", 
		      mom[p][3], mom[p][2], mom[p][1], t, corr[ich][p][t].re, corr[ich][p][t].im, ctag);
	  }
    }
  if(am_master)
    fclose(fp);
  
  for(int t=0; t<lt; t++)
    {
      free(corr_x[t]);
      free(corr_k[t]);
    }
  free(corr_x);
  free(corr_k);

  for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++)
    {
      for(int p=0; p<nmom; p++)
	free(corr[ich][p]);
      free(corr[ich]);
    }
  free(mom);
  return;
}
Beispiel #26
0
/* FIX THIS - more efficient to take cross product of first two
   rows, dot with third. */
complex det_su3( su3_matrix *a ) {
register complex cc,dd,sum;
    CMUL(a->e[0][0],a->e[1][1],cc);
    CMUL(cc,a->e[2][2],sum);
    CMUL(a->e[0][0],a->e[1][2],cc);
    CMUL(cc,a->e[2][1],dd);
    CSUB(sum,dd,sum);
    CMUL(a->e[0][1],a->e[1][2],cc);
    CMUL(cc,a->e[2][0],dd);
    CADD(sum,dd,sum);
    CMUL(a->e[0][1],a->e[1][0],cc);
    CMUL(cc,a->e[2][2],dd);
    CSUB(sum,dd,sum);
    CMUL(a->e[0][2],a->e[1][0],cc);
    CMUL(cc,a->e[2][1],dd);
    CADD(sum,dd,sum);
    CMUL(a->e[0][2],a->e[1][1],cc);
    CMUL(cc,a->e[2][0],dd);
    CSUB(sum,dd,sum);
    return(sum);
}
Beispiel #27
0
void baryon_cont1(field_offset src1, field_offset src2, field_offset src3, 
		  int chi_b[4][4], int eps[3][3][3], Real prop[MAX_P][MAX_NT])
/* src1-3 are type wilson_propagator */
{

register int i;
register site *s;

int my_t;

int ci_1, ci_2, ci_3, si_1, si_2, si_3;
int cf_1, cf_2, cf_3, sf_1, sf_2, sf_3;
int  chi_i, chi_f, eps_f, eps_i,j;
Real factor;
complex diquark, diquark_temp;

/* for nonzero momentum */
Real cx,cy,cz,cxy,cyz,cxz,c111;

    FORALLSITES(i,s){

	my_t = s->t;

	/* Sum over source and sink colors of quark 3 */
	for(ci_3=0;ci_3<Nc;ci_3++)for(cf_3=0;cf_3<Nc;cf_3++){

	  diquark = cmplx(0.0,0.0);

	  /* Sum over source spins of quarks 1 and 2 */
	  /* They will form the "di_quark" */
	  for(si_1=0;si_1<Ns;si_1++)for(si_2=0;si_2<Ns;si_2++){

	    chi_i = chi_b[si_1][si_2];
	    if( chi_i != 0 ){

	      /* Sum over sink spins of quarks 1 and 2 */
	      for(sf_1=0;sf_1<Ns;sf_1++)for(sf_2=0;sf_2<Ns;sf_2++){

		chi_f = chi_b[sf_1][sf_2];
		if( chi_f != 0 ){

		  /* Sum over source colors of quarks 1 and 2 */
		  for(ci_1=0;ci_1<Nc;ci_1++)
		  if(ci_1 != ci_3) for(ci_2=0;ci_2<Nc;ci_2++){

		    eps_i = eps[ci_1][ci_2][ci_3];
		    if( eps_i != 0 ){

		      /* Sum over sink colors of quarks 1 and 2 */
		      for(cf_1=0;cf_1<Nc;cf_1++)
		      if(cf_1 != cf_3) for(cf_2=0;cf_2<Nc;cf_2++){

			eps_f = eps[cf_1][cf_2][cf_3];
			if( eps_f != 0 ){

			  factor = (Real)(eps_f*eps_i*chi_i*chi_f);
			  CMUL(
((wilson_propagator *)F_PT(s,src1))->c[cf_1].d[sf_1].d[si_1].c[ci_1],
((wilson_propagator *)F_PT(s,src2))->c[cf_2].d[sf_2].d[si_2].c[ci_2],
				diquark_temp);
			  diquark.real += factor*diquark_temp.real;
			  diquark.imag += factor*diquark_temp.imag;

			}  /* eps_f */
		      }  /* sum cf_1, cf_2 */
		    }  /* eps_i */
		  }  /* sum ci_1, ci_2 */
		}  /* chi_f */
	      }  /* sum sf_1, sf_2 */
	    }  /* chi_i */
	  } /* sum si_1, si_2 */

	  /* Sum over source and sink spin of uncontracted quark 3 */
	  /* Actually just use spin 1 */
	  si_3 = sf_3 = 1;

	    CMUL(diquark,
((wilson_propagator *)F_PT(s,src3))->c[cf_3].d[sf_3].d[si_3].c[ci_3],
		diquark_temp);


                for(j=0;j<3;j++){
                cz=cos(2.0*PI/(Real)nz*(Real)(s->z)*(Real)j);
                cx=cos(2.0*PI/(Real)nx*(Real)(s->x)*(Real)j);
                cy=cos(2.0*PI/(Real)ny*(Real)(s->y)*(Real)j);

 
                prop[j][my_t] += diquark_temp.real*(cx+cy+cz)/3.0;
                }
                cxy=cos(2.0*PI/(Real)nz*(Real)(s->x +s->y));
                cxz=cos(2.0*PI/(Real)nz*(Real)(s->x +s->z));
                cyz=cos(2.0*PI/(Real)nz*(Real)(s->y +s->z));
                c111=cos(2.0*PI/(Real)nz*(Real)(s->x +s->y + s->z));
                prop[3][my_t] += diquark_temp.real*(cxy+cyz+cxz)/3.0;
                prop[4][my_t] += diquark_temp.real*c111;



	  /* }  */ /* sum sf_3, si_3 */
	}  /* sum cf_3, ci_3 */

    }  /* FORALLSITES */
Beispiel #28
0
/**
 * Do a complex FFT with the parameters defined in ff_fft_init(). The
 * input data must be permuted before with s->revtab table. No
 * 1.0/sqrt(n) normalization is done.  
 */
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
{
    int ln = s->nbits;
    int	j, np, np2;
    int	nblocks, nloops;
    register FFTComplex *p, *q;
    FFTComplex *exptab = s->exptab;
    int l;
    FFTSample tmp_re, tmp_im;

    np = 1 << ln;

    /* pass 0 */

    p=&z[0];
    j=(np >> 1);
    do {
        BF(p[0].re, p[0].im, p[1].re, p[1].im, 
           p[0].re, p[0].im, p[1].re, p[1].im);
        p+=2;
    } while (--j != 0);

    /* pass 1 */

    
    p=&z[0];
    j=np >> 2;
    if (s->inverse) {
        do {
            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
               p[0].re, p[0].im, p[2].re, p[2].im);
            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
               p[1].re, p[1].im, -p[3].im, p[3].re);
            p+=4;
        } while (--j != 0);
    } else {
        do {
            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
               p[0].re, p[0].im, p[2].re, p[2].im);
            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
               p[1].re, p[1].im, p[3].im, -p[3].re);
            p+=4;
        } while (--j != 0);
    }
    /* pass 2 .. ln-1 */

    nblocks = np >> 3;
    nloops = 1 << 2;
    np2 = np >> 1;
    do {
        p = z;
        q = z + nloops;
        for (j = 0; j < nblocks; ++j) {
            BF(p->re, p->im, q->re, q->im,
               p->re, p->im, q->re, q->im);
            
            p++;
            q++;
            for(l = nblocks; l < np2; l += nblocks) {
                CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
                BF(p->re, p->im, q->re, q->im,
                   p->re, p->im, tmp_re, tmp_im);
                p++;
                q++;
            }

            p += nloops;
            q += nloops;
        }
        nblocks = nblocks >> 1;
        nloops = nloops << 1;
    } while (nblocks != 0);
}
/* Inverse 1-D Discrete Cosine Transform.
   Result Y is scaled up by factor sqrt(8).
   Original Loeffler algorithm.
*/
static void idct_1d(int *Y)
{
	int z1[8], z2[8], z3[8];

	/* Stage 1: */
	but(Y[0], Y[4], z1[1], z1[0]);
	/* rot(sqrt(2), 6, Y[2], Y[6], &z1[2], &z1[3]); */
	z1[2] = SUB(CMUL(8867, Y[2]), CMUL(21407, Y[6]));
	z1[3] = ADD(CMUL(21407, Y[2]), CMUL(8867, Y[6]));
	but(Y[1], Y[7], z1[4], z1[7]);
	/* z1[5] = CMUL(sqrt(2), Y[3]);
	   z1[6] = CMUL(sqrt(2), Y[5]);
	 */
	z1[5] = CMUL(23170, Y[3]);
	z1[6] = CMUL(23170, Y[5]);

	/* Stage 2: */
	but(z1[0], z1[3], z2[3], z2[0]);
	but(z1[1], z1[2], z2[2], z2[1]);
	but(z1[4], z1[6], z2[6], z2[4]);
	but(z1[7], z1[5], z2[5], z2[7]);

	/* Stage 3: */
	z3[0] = z2[0];
	z3[1] = z2[1];
	z3[2] = z2[2];
	z3[3] = z2[3];
	/* rot(1, 3, z2[4], z2[7], &z3[4], &z3[7]); */
	z3[4] = SUB(CMUL(13623, z2[4]), CMUL(9102, z2[7]));
	z3[7] = ADD(CMUL(9102, z2[4]), CMUL(13623, z2[7]));
	/* rot(1, 1, z2[5], z2[6], &z3[5], &z3[6]); */
	z3[5] = SUB(CMUL(16069, z2[5]), CMUL(3196, z2[6]));
	z3[6] = ADD(CMUL(3196, z2[5]), CMUL(16069, z2[6]));

	/* Final stage 4: */
	but(z3[0], z3[7], Y[7], Y[0]);
	but(z3[1], z3[6], Y[6], Y[1]);
	but(z3[2], z3[5], Y[5], Y[2]);
	but(z3[3], z3[4], Y[4], Y[3]);
}
Beispiel #30
0
/* Hadron wave functions. */
void wavefunc_t() {
register int i,j,n;
register site *s;
register complex cc;
msg_tag *tag;
Real finalrsq,scale,x;
int tmin,tmax,cgn,color;
/* for baryon code */
int ca,ca1,ca2,cb,cb1,cb2;
void symmetry_combine(field_offset src,field_offset space,int size,int dir);
void block_fourier(
 field_offset src,	/* src is field to be transformed */
 field_offset space,	/* space is working space, same size as src */
 int size,		/* Size of field in bytes.  The field must
			   consist of size/sizeof(complex) consecutive
			   complex numbers.  For example, an su3_vector
			   is 3 complex numbers. */
 int isign);		/* 1 for x -> k, -1 for k -> x */
void fourier(
field_offset src,	/* src is field to be transformed */
field_offset space,	/* space is working space, same size as src */
int size,		/* Size of field in bytes.  The field must
			   consist of size/sizeof(complex) consecutive
			   complex numbers.  For example, an su3_vector
			   is 3 complex numbers. */
int isign);		/* 1 for x -> k, -1 for k -> x */
void write_wf(field_offset src,char *string,int tmin,int tmax);

    /* Fix TUP Coulomb gauge - gauge links only*/
    rephase( OFF );
    gaugefix(TUP,(Real)1.8,500,(Real)GAUGE_FIX_TOL);
    rephase( ON );

    for(color=0;color<3;color++){ /* Make wall source */
        FORALLSITES(i,s){
	    for(j=0;j<3;j++)s->phi.c[j]=cmplx(0.0,0.0);
	    if( s->x%2==0 && s->y%2==0 && s->z%2==0 && s->t==0 ){
                s->phi.c[color] = cmplx(-1.0,0.0);
	    }
        }
        /* do a C.G. (source in phi, result in xxx) */
	load_ferm_links(&fn_links);
        cgn = ks_congrad(F_OFFSET(phi),F_OFFSET(xxx),mass,
			 niter, rsqprop, PRECISION, EVEN, &finalrsq, 
			 &fn_links);
        /* Multiply by -Madjoint, result in propmat[color] */
        dslash_site( F_OFFSET(xxx), F_OFFSET(propmat[color]), ODD, &fn_links);
        scalar_mult_latvec( F_OFFSET(xxx), (Real)(-2.0*mass),
	    F_OFFSET(propmat[color]), EVEN);
    }


    /* construct the diquark propagator--uses tempmat1 and do this before
you fft the quark propagator */

    FORALLSITES(i,s){
	for(ca=0;ca<3;ca++)for(cb=0;cb<3;cb++){
		ca1= (ca+1)%3; ca2= (ca+2)%3;
		cb1= (cb+1)%3; cb2= (cb+2)%3;
		CMUL((s->propmat[ca1].c[cb1]),(s->propmat[ca2].c[cb2]),
			(s->tempmat1.e[ca][cb]));

	CMUL((s->propmat[ca1].c[cb2]),(s->propmat[ca2].c[cb1]),
			cc);


		CSUB((s->tempmat1.e[ca][cb]),cc,(s->tempmat1.e[ca][cb]));
	}
    }
/* complex conjugate the diquark prop */
    FORALLSITES(i,s){
	for(ca=0;ca<3;ca++)for(cb=0;cb<3;cb++){
		CONJG((s->tempmat1.e[ca][cb]),(s->tempmat1.e[ca][cb]));
	}
    }
    /* Transform the diquark propagator.  */
   block_fourier( F_OFFSET(tempmat1), F_OFFSET(tempvec[0]),
	3*sizeof(su3_vector), FORWARDS);
/* complex conjugate the diquark prop. Now we have D(-k) for convolution */
    FORALLSITES(i,s){
	for(ca=0;ca<3;ca++)for(cb=0;cb<3;cb++){
		CONJG((s->tempmat1.e[ca][cb]),(s->tempmat1.e[ca][cb]));
	}
    }

    /* Transform the propagator.  */
    block_fourier( F_OFFSET(propmat[0]), F_OFFSET(tempvec[0]),
	3*sizeof(su3_vector), FORWARDS);

/* CODE SPECIFIC TO PARTICULAR PARTICLES */

/* MESON CODE */


    /* Square the result, component by component, sum over source and
	sink colors, result in ttt.c[0] */
    FORALLSITES(i,s){
	s->ttt.c[0].real = s->ttt.c[0].imag = 0.0;
	for(color=0;color<3;color++){
	    s->ttt.c[0].real += magsq_su3vec( &(s->propmat[color]) );
	}
    }