C++ (Cpp) trace_su3の例

コード例 #1

0

ファイルを表示

ファイル: density_half.c プロジェクト: erinaldi/milc_qcd

/* do measurements: load density, ploop, etc. and phases onto lattice */
void measure() {
   register int i,j,k, c, is_even;
   register site *s;
   int dx,dy,dz;	/* separation for correlated observables */
   int dir;		/* direction of separation */
   msg_tag *tag;
   register complex cc,dd;	/*scratch*/
   complex ztr, zcof, znum, zdet, TC, zd, density, zphase;
   complex p[4]; /* probabilities of n quarks at a site */
   complex np[4]; /* probabilities at neighbor site */
   complex pp[4][4]; /* joint probabilities of n here and m there */
   complex zplp, plp_even, plp_odd;
   Real locphase, phase;


   /* First make T (= timelike P-loop) from s->ploop_t 
      T stored in s->tempmat1
   */
   ploop_less_slice(nt-1,EVEN);
   ploop_less_slice(nt-1,ODD);

   phase = 0.;
   density = plp_even = plp_odd = cmplx(0.0, 0.0);
   for(j=0;j<4;j++){
	p[j]=cmplx(0.0,0.0);
	for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0);
   }
   FORALLSITES(i,s) {
      if(s->t != nt-1) continue;
      if( ((s->x+s->y+s->z)&0x1)==0 ) is_even=1; else is_even=0;
      mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1));

      zplp = trace_su3(&(s->tempmat1));
      if( is_even){CSUM(plp_even, zplp)}
      else        {CSUM(plp_odd, zplp)}

      ztr = trace_su3(&(s->tempmat1));
      CONJG(ztr, zcof);

      if(is_even){
        for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C;
        zdet = det_su3(&(s->tempmat1));
        znum = numer(C, ztr, zcof);
        CDIV(znum, zdet, zd);
        CSUM(density, zd);

        /* store n_quark probabilities at this site in lattice variable
	  qprob[], accumulate sum over lattice in p[] */
        cc= cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]);
        CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]);
        CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]);
        cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]);
  
        locphase = carg(&zdet);
        phase += locphase;
      }

   }

コード例 #2

0

ファイルを表示

ファイル: field_strength.c プロジェクト: andypea/MILC

    /* Make traceless */
    FORALLSITES(i,s){
      cc = trace_su3(&FIELD_STRENGTH(component));
      CMULREAL(cc,0.33333333333333333,cc);
      for(j=0;j<3;j++)
	CSUB(FIELD_STRENGTH(component).e[j][j],cc,
	     FIELD_STRENGTH(component).e[j][j]);
    }

コード例 #3

0

ファイルを表示

ファイル: gauge_stuff.c プロジェクト: erinaldi/milc_qcd

double imp_gauge_action() {
    register int i;
    int rep;
    register site *s;
    complex trace;
    double g_action;
    double action,act2,total_action;
    su3_matrix *tempmat1;
    int length;

    /* these are for loop_table  */
    int ln,iloop;

    g_action=0.0;

    tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
    if(tempmat1 == NULL){
      printf("imp_gauge_action: Can't malloc temporary\n");
      terminate(1);
  }

    /* gauge action */
    for(iloop=0;iloop<NLOOP;iloop++){
	length=loop_length[iloop];
	/* loop over rotations and reflections */
	for(ln=0;ln<loop_num[iloop];ln++){

	    path_product( loop_table[iloop][ln] , length, tempmat1 );

	    FORALLSITES(i,s){
		trace=trace_su3( &tempmat1[i] );
		action =  3.0 - (double)trace.real;
		/* need the "3 -" for higher characters */
        	total_action= (double)loop_coeff[iloop][0]*action;
        	act2=action;
		for(rep=1;rep<NREPS;rep++){
		    act2 *= action;
		    total_action += (double)loop_coeff[iloop][rep]*act2;
		}

        	g_action  += total_action;

	    } END_LOOP /* sites */
	} /* ln */
    } /* iloop */

コード例 #4

0

ファイルを表示

ファイル: stout_smear_utilities.c プロジェクト: andypea/MILC

/* a = traceless-hermitian part of b.  b and a may be equivalent. */
static void
traceless_hermitian_su3(su3_matrix *a, su3_matrix *b)
{
  complex t;
  su3_matrix c;
  
  su3_adjoint( b, &c );
  add_su3_matrix( &c, b, a );
  
  t = trace_su3( a );
  
  CDIVREAL(t, 3., t);
  CSUB(a->e[0][0],t,a->e[0][0]);
  CSUB(a->e[1][1],t,a->e[1][1]);
  CSUB(a->e[2][2],t,a->e[2][2]);
  
  scalar_mult_su3_matrix( a, 0.5, a );
}

コード例 #5

0

ファイルを表示

ファイル: gauge_stuff.c プロジェクト: MistaGiggles/CompilerOptimisation-Coursework-1

double imp_gauge_action() {
    register int i;
    int rep;
    register site *s;
    complex trace;
    double g_action;
    double action,act2,total_action;
    int length;

    /* these are for loop_table  */
    int ln,iloop;

    g_action=0.0;

    /* gauge action */
    for(iloop=0;iloop<NLOOP;iloop++){
	length=loop_length[iloop];
	/* loop over rotations and reflections */
	for(ln=0;ln<loop_num[iloop];ln++){

	    path_product( loop_table[iloop][ln] , length );

	    FORALLSITES(i,s){
		trace=trace_su3( &s->tempmat1 );
		action =  3.0 - (double)trace.real;
		/* need the "3 -" for higher characters */
        	total_action= (double)loop_coeff[iloop][0]*action;
        	act2=action;
		for(rep=1;rep<NREPS;rep++){
		    act2 *= action;
		    total_action += (double)loop_coeff[iloop][rep]*act2;
		}

        	g_action  += total_action;

	    } END_LOOP /* sites */
	} /* ln */
    } /* iloop */

コード例 #6

0

ファイルを表示

ファイル: stout_smear.c プロジェクト: erinaldi/milc_qcd

static void 
get_Q_from_VUadj(su3_matrix *Q, su3_matrix *V, su3_matrix *U){

  complex x;
  complex tr;
  su3_matrix Om;

  x = cmplx(0, 0.5);  /* i/2 */

  /* Om = V U^adj */
  mult_su3_na( V, U, &Om );

  /* Q = i/2(Om^adj - Om) */
  su3_adjoint( &Om, Q );
  sub_su3_matrix( Q, &Om, &Om );
  c_scalar_mult_su3mat( &Om, &x, Q );
  /* Q = Q - Tr Q/3 */
  tr = trace_su3( Q );
  CDIVREAL(tr, 3., tr);
  CSUB(Q->e[0][0],tr,Q->e[0][0]);
  CSUB(Q->e[1][1],tr,Q->e[1][1]);
  CSUB(Q->e[2][2],tr,Q->e[2][2]);
}

コード例 #7

0

ファイルを表示

ファイル: gluon_prop_imp.c プロジェクト: erinaldi/milc_qcd

void gluon_prop( void ) {
register int i,dir;
register int pmu;
register site *s;
anti_hermitmat ahtmp;
Real pix, piy, piz, pit;
Real sin_pmu, sin_pmu2, prop_s, prop_l, ftmp1, ftmp2;
complex ctmp;
su3_matrix mat;
struct {
  Real f1, f2;
} msg;
double trace, dmuAmu;
int px, py, pz, pt;
int currentnode,newnode;

    pix = PI / (Real)nx;
    piy = PI / (Real)ny;
    piz = PI / (Real)nz;
    pit = PI / (Real)nt;

    trace = 0.0;
    /* Make A_mu as anti-hermition traceless part of U_mu */
    /* But store as SU(3) matrix for call to FFT */
    for(dir=XUP; dir<=TUP; dir++)
    {
	FORALLSITES(i,s){
	    trace += (double)(trace_su3( &(s->link[dir]))).real;
	    make_anti_hermitian( &(s->link[dir]), &ahtmp);
	    uncompress_anti_hermitian( &ahtmp, &(s->a_mu[dir]));
	}

	g_sync();
	/* Now Fourier transform */
	restrict_fourier_site(F_OFFSET(a_mu[dir]),
			      sizeof(su3_matrix), FORWARDS);
    }

コード例 #8

0

ファイルを表示

ファイル: density.c プロジェクト: erinaldi/milc_qcd

/* do measurements: load density, ploop, etc. and phases onto lattice */
void measure() {
   register int i,j,k, c;
   register site *s;
   int dx,dy,dz;	/* separation for correlated observables */
   int dir;		/* direction of separation */
   msg_tag *tag;
   register complex cc,dd;	/*scratch*/
   complex ztr, zcof, znum, zdet, TC, zd, density, zphase;
   complex p[4]; /* probabilities of n quarks at a site */
   complex np[4]; /* probabilities at neighbor site */
   complex pp[4][4]; /* joint probabilities of n here and m there */
   complex zplp, plp;
   Real locphase, phase;


   /* First make T (= timelike P-loop) from s->ploop_t 
      T stored in s->tempmat1
   */
   ploop_less_slice(nt-1,EVEN);
   ploop_less_slice(nt-1,ODD);

   phase = 0.;
   density = plp = cmplx(0.0, 0.0);
   for(j=0;j<4;j++){
	p[j]=cmplx(0.0,0.0);
	for(k=0;k<4;k++)pp[j][k]=cmplx(0.0,0.0);
   }
   FORALLSITES(i,s) {
      if(s->t != nt-1) continue;
      mult_su3_nn(&(s->link[TUP]), &(s->ploop_t), &(s->tempmat1));

      zplp = trace_su3(&(s->tempmat1));
      CSUM(plp, zplp);

      ztr = trace_su3(&(s->tempmat1));
      CONJG(ztr, zcof);

      for(c=0; c<3; ++c) s->tempmat1.e[c][c].real += C;
      zdet = det_su3(&(s->tempmat1));
      znum = numer(C, ztr, zcof);
      CDIV(znum, zdet, zd);
      CSUM(density, zd);

      /* store n_quark probabilities at this site in lattice variable
	qprob[], accumulate sum over lattice in p[] */
      cc = cmplx(C*C*C,0.0); CDIV(cc,zdet,s->qprob[0]); CSUM(p[0],s->qprob[0]);
      CMULREAL(ztr,C*C,cc); CDIV(cc,zdet,s->qprob[1]); CSUM(p[1],s->qprob[1]);
      CMULREAL(zcof,C,cc); CDIV(cc,zdet,s->qprob[2]); CSUM(p[2],s->qprob[2]);
      cc = cmplx(1.0,0.0); CDIV(cc,zdet,s->qprob[3]); CSUM(p[3],s->qprob[3]);

      locphase = carg(&zdet);
      phase += locphase;

   }
   g_floatsum( &phase );
   g_complexsum( &density );
   g_complexsum( &p[0] );
   g_complexsum( &p[1] );
   g_complexsum( &p[2] );
   g_complexsum( &p[3] );
   g_complexsum( &plp );
   CDIVREAL(density,(Real)(nx*ny*nz),density);
   CDIVREAL(p[0],(Real)(nx*ny*nz),p[0]);
   CDIVREAL(p[1],(Real)(nx*ny*nz),p[1]);
   CDIVREAL(p[2],(Real)(nx*ny*nz),p[2]);
   CDIVREAL(p[3],(Real)(nx*ny*nz),p[3]);
   CDIVREAL(plp,(Real)(nx*ny*nz),plp);

   zphase = ce_itheta(phase);
   if(this_node == 0) {
      printf("ZMES\t%e\t%e\t%e\t%e\t%e\t%e\n", zphase.real, zphase.imag, 
	                               density.real, density.imag,
	                               plp.real, plp.imag);
      printf("PMES\t%e\t%e\t%e\t%e\t%e\t%e\t%e\t%e\n",
				p[0].real, p[0].imag, p[1].real, p[1].imag,
				p[2].real, p[2].imag, p[3].real, p[3].imag );
   }

#ifdef PPCORR
   dx=1; dy=0; dz=0;	/* Temporary - right now we just do nearest neighbor */
   for(dir=XUP;dir<=ZUP;dir++){
      tag = start_gather_site( F_OFFSET(qprob[0]), 4*sizeof(complex), dir,
	   EVENANDODD, gen_pt[0] );
      wait_gather(tag);
      FORALLSITES(i,s)if(s->t==nt-1){
        for(j=0;j<4;j++)for(k=0;k<4;k++){
	   CMUL( (s->qprob)[j],((complex *)gen_pt[0][i])[k],cc);
           CSUM(pp[j][k],cc);
        }
      }
      cleanup_gather(tag);
   }

   /* density correlation format:
	PP dx dy dz n1 n2 real imag */
   for(j=0;j<4;j++)for(k=0;k<4;k++){
     g_complexsum( &pp[j][k] );
     CDIVREAL(pp[j][k],(Real)(3*nx*ny*nz),pp[j][k]);
     if(this_node==0)
       printf("PP %d %d %d   %d %d   %e   %e\n",dx,dy,dz,j,k,
	  pp[j][k].real,pp[j][k].imag);
   }
#endif /*PPCORR*/
}

コード例 #9

0

ファイルを表示

ファイル: stout_smear.c プロジェクト: erinaldi/milc_qcd

/* Get the coefficients f of the expansion of exp(iQ)                   */
static void 
get_fs_from_Qs( complex f[3], su3_matrix *Q, su3_matrix *QQ){

  su3_matrix QQQ;
  Real trQQQ, trQQ, c0, c1;
  Real c0abs, c0max, theta;
  Real eps, sqtwo = sqrt(2.);
  Real u, w, u_sq, w_sq, xi0;
  Real cosu, sinu, cosw, sin2u, cos2u, ucosu, usinu, ucos2u, usin2u;
  Real denom, subexp1, subexp2, subexp3, subexp;

  mult_su3_nn ( Q, QQ, &QQQ );

  trQQ  = (trace_su3( QQ )).real;
  trQQQ = (trace_su3( &QQQ )).real;

  c0 = 1./3. * trQQQ;
  c1 = 1./2. * trQQ;

  if( c1 < 4.0e-3  )
    { // RGE: set to 4.0e-3 (CM uses this value). I ran into nans with 1.0e-4
      // =======================================================================
      //
      // Corner Case 1: if c1 < 1.0e-4 this implies c0max ~ 3x10^-7
      //    and in this case the division c0/c0max in arccos c0/c0max can be undefined
      //    and produce NaN's

      // In this case what we can do is get the f-s a different way. We go back to basics:
      //
      // We solve (using maple) the matrix equations using the eigenvalues
      //
      //  [ 1, q_1, q_1^2 ] [ f_0 ]       [ exp( iq_1 ) ]
      //  [ 1, q_2, q_2^2 ] [ f_1 ]   =   [ exp( iq_2 ) ]
      //  [ 1, q_3, q_3^2 ] [ f_2 ]       [ exp( iq_3 ) ]
      //
      // with q_1 = 2 u w, q_2 = -u + w, q_3 = - u - w
      //
      // with u and w defined as  u = sqrt( c_1/ 3 ) cos (theta/3)
      //                     and  w = sqrt( c_1 ) sin (theta/3)
      //                          theta = arccos ( c0 / c0max )
      // leaving c0max as a symbol.
      //
      //  we then expand the resulting f_i as a series around c0 = 0 and c1 = 0
      //  and then substitute in c0max = 2 ( c_1/ 3)^(3/2)
      //
      //  we then convert the results to polynomials and take the real and imaginary parts:
      //  we get at the end of the day (to low order)

      //                  1    2
      //   f0[re] := 1 - --- c0  + h.o.t
      //                 720     
      //
      //               1       1           1        2
      //   f0[im] := - - c0 + --- c0 c1 - ---- c0 c1   + h.o.t
      //               6      120         5040       
      //
      //
      //             1        1            1        2
      //   f1[re] := -- c0 - --- c0 c1 + ----- c0 c1  +  h.o.t
      //             24      360         13440        f
      //
      //                 1       1    2    1     3    1     2
      //   f1[im] := 1 - - c1 + --- c1  - ---- c1  - ---- c0   + h.o.t
      //                 6      120       5040       5040
      //
      //               1   1        1    2     1     3     1     2
      //   f2[re] := - - + -- c1 - --- c1  + ----- c1  + ----- c0  + h.o.t
      //               2   24      720       40320       40320   
      //
      //              1        1              1        2
      //   f2[im] := --- c0 - ---- c0 c1 + ------ c0 c1  + h.o.t
      //             120      2520         120960

      //  We then express these using Horner's rule for more stable evaluation.
      //
      //  =====================================================================

      f[0].real = 1. - c0*c0/720.;
      f[0].imag = -(c0/6.)*(1. - (c1/20.)*(1. - (c1/42.))) ;

      f[1].real =  c0/24.*(1. - c1/15.*(1. - 3.*c1/112.)) ;
      f[1].imag =  1.-c1/6.*(1. - c1/20.*(1. - c1/42.)) - c0*c0/5040. ;

      f[2].real = 0.5*(-1. + c1/12.*(1. - c1/30.*(1. - c1/56.)) + c0*c0/20160.);
      f[2].imag = 0.5*(c0/60.*(1. - c1/21.*(1. - c1/48.)));

    }
  else
    {
      // =======================================================================
      // Normal case: Do as per Morningstar-Peardon paper
      // =======================================================================

      c0abs = fabs( c0 );
      c0max = 2*pow( c1/3., 1.5);

      // =======================================================================
      // Now work out theta. In the paper the case where c0 -> c0max even when c1 is reasonable
      // Has never been considered, even though it can arise and can cause the arccos function
      // to fail
      // Here we handle it with series expansion
      // =======================================================================
      eps = (c0max - c0abs)/c0max;

      if( eps < 0 ) {
        // =====================================================================
        // Corner Case 2: Handle case when c0abs is bigger than c0max.
        // This can happen only when there is a rounding error in the ratio, and that the
        // ratio is really 1. This implies theta = 0 which we'll just set.
        // =====================================================================
        theta = 0;
      }
      else if ( eps < 1.0e-3 ) {
        // =====================================================================
        // Corner Case 3: c0->c0max even though c1 may be actually quite reasonable.
        // The ratio |c0|/c0max -> 1 but is still less than one, so that a
        // series expansion is possible.
        // SERIES of acos(1-epsilon): Good to O(eps^6) or with this cutoff to O(10^{-18}) Computed with Maple.
        //  BTW: 1-epsilon = 1 - (c0max-c0abs)/c0max = 1-(1 - c0abs/c0max) = +c0abs/c0max
        //
        // ======================================================================
        theta = sqtwo*sqrt(eps)*( 1 + ( (1./12.) + ( (3./160.) + ( (5./896.) + ( (35./18432.) + (63./90112.)*eps ) *eps) *eps) *eps) *eps);
      }
      else {
        theta = acos( c0abs/c0max );
      }

      u = sqrt(c1/3.)*cos(theta/3.);
      w = sqrt(c1)*sin(theta/3.);

      u_sq = u*u;
      w_sq = w*w;

      if( fabs(w) < 0.05 ) {
        xi0 = 1. - (1./6.)*w_sq*( 1. - (1./20.)*w_sq*( 1. - (1./42.)*w_sq ) );
      }
      else {
        xi0 = sin(w)/w;
      }

      cosu = cos(u);
      sinu = sin(u);
      cosw = cos(w);
      sin2u = sin(2*u);
      cos2u = cos(2*u);
      ucosu = u*cosu;
      usinu = u*sinu;
      ucos2u = u*cos2u;
      usin2u = u*sin2u;

      denom = 9.*u_sq - w_sq;

      subexp1 = u_sq - w_sq;
      subexp2 = 8*u_sq*cosw;
      subexp3 = (3*u_sq + w_sq)*xi0;

      f[0].real = ( (subexp1)*cos2u + cosu*subexp2 + 2*usinu*subexp3 ) / denom ;
      f[0].imag = ( (subexp1)*sin2u - sinu*subexp2 + 2*ucosu*subexp3 ) / denom ;

      subexp = (3*u_sq -w_sq)*xi0;

      f[1].real = (2*(ucos2u - ucosu*cosw)+subexp*sinu)/denom;
      f[1].imag = (2*(usin2u + usinu*cosw)+subexp*cosu)/denom;

      subexp=3*xi0;

      f[2].real = (cos2u - cosu*cosw -usinu*subexp) /denom ;
      f[2].imag = (sin2u + sinu*cosw -ucosu*subexp) /denom ;

      if( c0 < 0 ) {

        // f[0] = conj(f[0]);
        f[0].imag *= -1;
       
        //f[1] = -conj(f[1]);
        f[1].real *= -1;
       
        //f[2] = conj(f[2]);
        f[2].imag *= -1;

      }
    } // End of if( corner_caseP ) else {}
}

コード例 #10

0

ファイルを表示

ファイル: twopt.c プロジェクト: erinaldi/milc_qcd

static complex  
KS_2pt_trace(su3_matrix * antiquark, wilson_propagator * quark, 
		      int * g_snk, int n_snk, int *g_src, int n_src, int *p, site *s)
{
  int t;
  int my_x;
  int my_y;
  int my_z;
  
  complex trace;
  int s0;
  int c0,c1,i;

  wilson_propagator temp,temp1;
  su3_matrix mat, mat1;
  

  t = s->t;
  my_x = s->x;
  my_y = s->y;
  my_z = s->z;
  
  temp = *quark;

  //multiply by gamma_snk

   for(i=0;i<n_snk;i++)
     for(c0=0;c0<3;c0++){
      mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), g_snk[i]);
      temp.c[c0] = temp1.c[c0]; 
    } 
   
   //multiply by Omega field
   if((t % 2) == 1)
     for(c0=0;c0<3;c0++){
       mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), TUP);
       temp.c[c0] = temp1.c[c0]; 
     }
    
   if((my_x % 2) == 1)
     for(c0=0;c0<3;c0++){
       mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), XUP); 
       temp.c[c0] = temp1.c[c0]; 
     }
   
   if((my_y % 2) == 1)
     for(c0=0;c0<3;c0++){
       mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), YUP); 
       temp.c[c0] = temp1.c[c0]; 
     }
   
   if((my_z % 2) == 1)
     for(c0=0;c0<3;c0++){
       mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), ZUP);  
       temp.c[c0] = temp1.c[c0]; 
     } 
 
   //mulptiply by gamma_src
   for(c0=0;c0<3;c0++)
     for(i=0;i<n_src;i++)
       {
	 mult_swv_by_gamma_l( &(temp.c[c0]), &(temp1.c[c0]), g_src[i]);   
	 temp.c[c0] = temp1.c[c0];
       }
   
   for(c0=0;c0<3;c0++) 
     for(c1=0;c1<3;c1++){
       trace.real = 0.0;
       trace.imag = 0.0;
       
       for(s0=0;s0<4;s0++){
	 trace.real += temp.c[c0].d[s0].d[s0].c[c1].real;
	 trace.imag += temp.c[c0].d[s0].d[s0].c[c1].imag;
       }
       
       mat.e[c0][c1].real = trace.real;
       mat.e[c0][c1].imag = trace.imag;
       
     }
   
   mult_su3_na(&mat, antiquark, &mat1); //antiquark is just the staggered prop su3 matrix
   
   trace = trace_su3(&mat1);

   return(trace);
}

コード例 #11

0

ファイルを表示

ファイル: stout_smear_utilities.c プロジェクト: andypea/MILC

static void 
get_fs_and_bs_from_Qs( complex f[3], complex b1[3], complex b2[3], 
		       su3_matrix *Q, su3_matrix *QQ, int do_bs )
{

  su3_matrix QQQ;
  Real trQQQ, trQQ, c0, c1;
  Real c0abs, c0max, theta;
  Real eps, sqtwo = sqrt(2.);
  Real u, w, u_sq, w_sq, xi0, xi1;
  Real cosu, sinu, cosw, sinw, sin2u, cos2u, ucosu, usinu, ucos2u, usin2u;
  Real denom;

  Real r_1_re[3], r_1_im[3], r_2_re[3], r_2_im[3];
  Real b_denom;

  mult_su3_nn ( Q, QQ, &QQQ );

  trQQ  = (trace_su3( QQ )).real;
  trQQQ = (trace_su3( &QQQ )).real;

  c0 = 1./3. * trQQQ;
  c1 = 1./2. * trQQ;
  
  if( c1 < 4.0e-3  ) 
    { // RGE: set to 4.0e-3 (CM uses this value). I ran into nans with 1.0e-4
      // =======================================================================
      // 
      // Corner Case 1: if c1 < 1.0e-4 this implies c0max ~ 3x10^-7
      //    and in this case the division c0/c0max in arccos c0/c0max can be undefined
      //    and produce NaN's
      
      // In this case what we can do is get the f-s a different way. We go back to basics:
      //
      // We solve (using maple) the matrix equations using the eigenvalues 
      //
      //  [ 1, q_1, q_1^2 ] [ f_0 ]       [ exp( iq_1 ) ]
      //  [ 1, q_2, q_2^2 ] [ f_1 ]   =   [ exp( iq_2 ) ]
      //  [ 1, q_3, q_3^2 ] [ f_2 ]       [ exp( iq_3 ) ]
      //
      // with q_1 = 2 u w, q_2 = -u + w, q_3 = - u - w
      // 
      // with u and w defined as  u = sqrt( c_1/ 3 ) cos (theta/3)
      //                     and  w = sqrt( c_1 ) sin (theta/3)
      //                          theta = arccos ( c0 / c0max )
      // leaving c0max as a symbol.
      //
      //  we then expand the resulting f_i as a series around c0 = 0 and c1 = 0
      //  and then substitute in c0max = 2 ( c_1/ 3)^(3/2)
      //  
      //  we then convert the results to polynomials and take the real and imaginary parts:
      //  we get at the end of the day (to low order)
      
      //                  1    2 
      //   f0[re] := 1 - --- c0  + h.o.t
      //                 720     
      //
      //               1       1           1        2 
      //   f0[im] := - - c0 + --- c0 c1 - ---- c0 c1   + h.o.t
      //               6      120         5040        
      //
      //
      //              1        1            1        2 
      //   f1[re] := -- c0 - --- c0 c1 + ----- c0 c1  +  h.o.t
      //             24      360         13440        f
      //
      //                 1       1    2    1     3    1     2
      //   f1[im] := 1 - - c1 + --- c1  - ---- c1  - ---- c0   + h.o.t
      //                 6      120       5040       5040
      //
      //               1   1        1    2     1     3     1     2
      //   f2[re] := - - + -- c1 - --- c1  + ----- c1  + ----- c0  + h.o.t
      //               2   24      720       40320       40320    
      //
      //              1        1              1        2
      //   f2[im] := --- c0 - ---- c0 c1 + ------ c0 c1  + h.o.t
      //             120      2520         120960
      
      //  We then express these using Horner's rule for more stable evaluation.
      // 
      //  to get the b-s we use the fact that
      //                                      b2_i = d f_i / d c0
      //                                 and  b1_i = d f_i / d c1
      //
      //  where the derivatives are partial derivatives
      //
      //  And we just differentiate the polynomials above (keeping the same level
      //  of truncation) and reexpress that as Horner's rule
      // 
      //  This clearly also handles the case of a unit gauge as no c1, u etc appears in the 
      //  denominator and the arccos is never taken. In this case, we have the results in 
      //  the raw c0, c1 form and we don't need to flip signs and take complex conjugates.
      //
      //  (not CD) I checked the expressions below by taking the difference between the Horner forms
      //  below from the expanded forms (and their derivatives) above and checking for the
      //  differences to be zero. At this point in time maple seems happy.
      //  ==================================================================
          
      f[0].real = 1. - c0*c0/720.;
      f[0].imag = -(c0/6.)*(1. - (c1/20.)*(1. - (c1/42.))) ;
      
      f[1].real =  c0/24.*(1. - c1/15.*(1. - 3.*c1/112.)) ;
      f[1].imag =  1.-c1/6.*(1. - c1/20.*(1. - c1/42.)) - c0*c0/5040. ;
      
      f[2].real = 0.5*(-1. + c1/12.*(1. - c1/30.*(1. - c1/56.)) + c0*c0/20160.);
      f[2].imag = 0.5*(c0/60.*(1. - c1/21.*(1. - c1/48.)));
      
      if( do_bs ) {
	//  partial f0/ partial c0
	b2[0].real = -c0/360.;
	b2[0].imag =  -(1./6.)*(1.-(c1/20.)*(1.-c1/42.));
        
	// partial f0 / partial c1
	//
	b1[0].real = 0;
	b1[0].imag = (c0/120.)*(1.-c1/21.);
        
	// partial f1 / partial c0
	//
	b2[1].real = (1./24.)*(1.-c1/15.*(1.-3.*c1/112.));
	b2[1].imag = -c0/2520.;
	
        
	// partial f1 / partial c1
	b1[1].real = -c0/360.*(1. - 3.*c1/56. );
	b1[1].imag = -1./6.*(1.-c1/10.*(1.-c1/28.));
        
	// partial f2/ partial c0
	b2[2].real = 0.5*c0/10080.;
	b2[2].imag = 0.5*(  1./60.*(1.-c1/21.*(1.-c1/48.)) );
        
	// partial f2/ partial c1
	b1[2].real = 0.5*(  1./12.*(1.-(2.*c1/30.)*(1.-3.*c1/112.)) ); 
	b1[2].imag = 0.5*( -c0/1260.*(1.-c1/24.) );
        
      } // do_bs
    }
  else 
    { 
      // =======================================================================
      // Normal case: Do as per Morningstar-Peardon paper
      // =======================================================================

      c0abs = fabs( c0 );
      c0max = 2*pow( c1/3., 1.5);
      
      // =======================================================================
      // Now work out theta. In the paper the case where c0 -> c0max even when c1 is reasonable 
      // Has never been considered, even though it can arise and can cause the arccos function
      // to fail
      // Here we handle it with series expansion
      // =======================================================================
      eps = (c0max - c0abs)/c0max;
      
      if( eps < 0 ) {
	// =====================================================================
	// Corner Case 2: Handle case when c0abs is bigger than c0max. 
	// This can happen only when there is a rounding error in the ratio, and that the 
	// ratio is really 1. This implies theta = 0 which we'll just set.
	// =====================================================================
	theta = 0;
      }
      else if ( eps < 1.0e-3 ) {
	// =====================================================================
	// Corner Case 3: c0->c0max even though c1 may be actually quite reasonable.
	// The ratio |c0|/c0max -> 1 but is still less than one, so that a 
	// series expansion is possible.
	// SERIES of acos(1-epsilon): Good to O(eps^6) or with this cutoff to O(10^{-18}) Computed with Maple.
	//  BTW: 1-epsilon = 1 - (c0max-c0abs)/c0max = 1-(1 - c0abs/c0max) = +c0abs/c0max
	//
	// ======================================================================
	theta = sqtwo*sqrt(eps)*( 1 + ( (1./12.) + ( (3./160.) + ( (5./896.) + ( (35./18432.) + (63./90112.)*eps ) *eps) *eps) *eps) *eps);
      } 
      else {  
	// 
	theta = acos( c0abs/c0max );
      }
          
      u = sqrt(c1/3.)*cos(theta/3.);
      w = sqrt(c1)*sin(theta/3.);
      
      u_sq = u*u;
      w_sq = w*w;

      if( fabs(w) < 0.05 ) { 
	xi0 = 1. - (1./6.)*w_sq*( 1. - (1./20.)*w_sq*( 1. - (1./42.)*w_sq ) );
      }
      else {
	xi0 = sin(w)/w;
      }
      
      if( do_bs) {
	
	if( fabs(w) < 0.05 ) { 
	  xi1 = -( 1./3. - (1./30.)*w_sq*( 1. - (1./28.)*w_sq*( 1. - (1./54.)*w_sq ) ) );
	}
	else { 
	  xi1 = cos(w)/w_sq - sin(w)/(w_sq*w);
	}
      }

      cosu = cos(u);
      sinu = sin(u);
      cosw = cos(w);
      sinw = sin(w);
      sin2u = sin(2*u);
      cos2u = cos(2*u);
      ucosu = u*cosu;
      usinu = u*sinu;
      ucos2u = u*cos2u;
      usin2u = u*sin2u;
      
      denom = 9.*u_sq - w_sq;

      {
	Real subexp1, subexp2, subexp3;

	subexp1 = u_sq - w_sq;
	subexp2 = 8*u_sq*cosw;
	subexp3 = (3*u_sq + w_sq)*xi0;
	
	f[0].real = ( (subexp1)*cos2u + cosu*subexp2 + 2*usinu*subexp3 ) / denom ;
	f[0].imag = ( (subexp1)*sin2u - sinu*subexp2 + 2*ucosu*subexp3 ) / denom ;

      }
      {
	Real subexp;
	
	subexp = (3*u_sq -w_sq)*xi0;
	
	f[1].real = (2*(ucos2u - ucosu*cosw)+subexp*sinu)/denom;
	f[1].imag = (2*(usin2u + usinu*cosw)+subexp*cosu)/denom;
      }
      {
	Real subexp;

	subexp=3*xi0;
      
	f[2].real = (cos2u - cosu*cosw -usinu*subexp) /denom ;
	f[2].imag = (sin2u + sinu*cosw -ucosu*subexp) /denom ;
      }

      if( do_bs )
	{
	  {
	      Real subexp1, subexp2, subexp3;
	      //          r_1[0]=Double(2)*cmplx(u, u_sq-w_sq)*exp2iu
	      //          + 2.0*expmiu*( cmplx(8.0*u*cosw, -4.0*u_sq*cosw)
	      //              + cmplx(u*(3.0*u_sq+w_sq),9.0*u_sq+w_sq)*xi0 );
	      
	      subexp1 = u_sq - w_sq;
	      subexp2 = 8.*cosw + (3.*u_sq + w_sq)*xi0 ;
	      subexp3 = 4.*u_sq*cosw - (9.*u_sq + w_sq)*xi0 ;
	      
	      r_1_re[0] = 2.*(ucos2u - sin2u *(subexp1)+ucosu*( subexp2 )- sinu*( subexp3 ) );
	      r_1_im[0] = 2.*(usin2u + cos2u *(subexp1)-usinu*( subexp2 )- cosu*( subexp3 ) );
	      
	  }
	  {
	      Real subexp1, subexp2;

	      // r_1[1]=cmplx(2.0, 4.0*u)*exp2iu + expmiu*cmplx(-2.0*cosw-(w_sq-3.0*u_sq)*xi0,2.0*u*cosw+6.0*u*xi0);
	      
	      subexp1 = cosw+3.*xi0;
	      subexp2 = 2.*cosw + xi0*(w_sq - 3.*u_sq);
	      
	      r_1_re[1] = 2.*((cos2u - 2.*usin2u) + usinu*subexp1) - cosu*subexp2;
	      r_1_im[1] = 2.*((sin2u + 2.*ucos2u) + ucosu*subexp1) + sinu*subexp2;
          }
	  {
	    Real subexp;
	    // r_1[2]=2.0*timesI(exp2iu)  +expmiu*cmplx(-3.0*u*xi0, cosw-3*xi0);
	    
	    subexp = cosw - 3.*xi0;
	    r_1_re[2] = -2.*sin2u -3.*ucosu*xi0 + sinu*subexp;
	    r_1_im[2] = 2.*cos2u  +3.*usinu*xi0 + cosu*subexp;
	  }
          
	  {
	    Real subexp;
	    //r_2[0]=-2.0*exp2iu + 2*cmplx(0,u)*expmiu*cmplx(cosw+xi0+3*u_sq*xi1,
	    //                                                 4*u*xi0);
	    
	    subexp = cosw + xi0 + 3.*u_sq*xi1;
	    r_2_re[0] = -2.*(cos2u + u*( 4.*ucosu*xi0 - sinu*subexp) );
	    r_2_im[0] = -2.*(sin2u - u*( 4.*usinu*xi0 + cosu*subexp) );
	  }
	  {
	    Real subexp;
          
	    // r_2[1]= expmiu*cmplx(cosw+xi0-3.0*u_sq*xi1, 2.0*u*xi0);
	    // r_2[1] = timesMinusI(r_2[1]);
	    
	    subexp =  cosw + xi0 - 3.*u_sq*xi1;
	    r_2_re[1] =  2.*ucosu*xi0 - sinu*subexp;
	    r_2_im[1] = -2.*usinu*xi0 - cosu*subexp;
	    
	  }
	  {
	    Real subexp;
	    //r_2[2]=expmiu*cmplx(xi0, -3.0*u*xi1);
	    
	    subexp = 3.*xi1;
            
	    r_2_re[2] =    cosu*xi0 - usinu*subexp ;
	    r_2_im[2] = -( sinu*xi0 + ucosu*subexp ) ;
	  }
          
	  b_denom=2.*denom*denom;
          
	  {
	    Real subexp1, subexp2, subexp3;
	    int j;

	    subexp1 = 2.*u;
	    subexp2 = 3.*u_sq - w_sq;
	    subexp3 = 2.*(15.*u_sq + w_sq);
	    
	    for(j=0; j < 3; j++) { 
	      
	      b1[j].real=( subexp1*r_1_re[j] + subexp2*r_2_re[j] - subexp3*f[j].real )/b_denom;
	      b1[j].imag=( subexp1*r_1_im[j] + subexp2*r_2_im[j] - subexp3*f[j].imag )/b_denom;
	    }
	  }
	  {
	    Real subexp1, subexp2;
	    int j;
	    
	    subexp1 = 3.*u;
	    subexp2 = 24.*u;
	    
	    for(j=0; j < 3; j++) { 
	      b2[j].real=( r_1_re[j] - subexp1*r_2_re[j] - subexp2 * f[j].real )/b_denom;
	      b2[j].imag=( r_1_im[j] - subexp1*r_2_im[j] - subexp2 * f[j].imag )/b_denom;
	    }
	  }
	  
	  // Now flip the coefficients of the b-s
	  if( c0 < 0 ) 
	    {
	      //b1_site[0] = conj(b1_site[0]);
	      b1[0].imag *= -1;
	      
	      //b1_site[1] = -conj(b1_site[1]);
	      b1[1].real *= -1;
	      
	      //b1_site[2] = conj(b1_site[2]);
	      b1[2].imag *= -1;
	      
	      //b2_site[0] = -conj(b2_site[0]);
	      b2[0].real *= -1;
	      
	      //b2_site[1] = conj(b2_site[1]);
	      b2[1].imag *= -1;
	      
	      //b2_site[2] = -conj(b2_site[2]);
	      b2[2].real *= -1;
	    }
	} // end of if (do_bs)
      
      // Now when everything is done flip signs of the b-s (can't do this before
      // as the unflipped f-s are needed to find the b-s
      
      if( c0 < 0 ) {
	
	// f[0] = conj(f[0]);
	f[0].imag *= -1;
        
	//f[1] = -conj(f[1]);
	f[1].real *= -1;
        
	//f[2] = conj(f[2]);
	f[2].imag *= -1;
        
      }
    } // End of if( corner_caseP ) else {}
}