RockPhysicsInversion4D::~RockPhysicsInversion4D()
{
 for(int i =0;i<4;i++)
   fftw_free(smoothingFilter_[i]);

 for(int i=0; i<2; i++){
    for (int j=0; j<nf_[0]; j++){
      delete meanRockPrediction_(i,j);
    }
  }

 fftwnd_destroy_plan(fftplan1_);
 fftwnd_destroy_plan(fftplan2_);
}
Example #2
0
int F77_FUNC_ (destroy_plan_3d, DESTROY_PLAN_3D)(fftwnd_plan *p)

{
   if ( *p != NULL ) fftwnd_destroy_plan(*p);
   else fprintf(stderr," *** DESTROY_PLAN_3D: warning empty plan ***\n");
   return 0;
}
//-----------------------------------------------------------------------
int fdct_wrapping_wavelet(CpxOffMat& Xhgh, vector<CpxNumMat>& csc)
{
  int N1 = Xhgh.m();  int N2 = Xhgh.n();
  int F1 = -Xhgh.s();  int F2 = -Xhgh.t();
  CpxNumMat T(N1, N2);
  fdct_wrapping_ifftshift(Xhgh, T);
    fftwnd_plan p ;
#pragma omp critical
  {
    p = fftw2d_create_plan(N2, N1, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
  }
  fftwnd_one(p, (fftw_complex*)T.data(), NULL);
  #pragma omp critical
  {
  fftwnd_destroy_plan(p);
  }
  double sqrtprod = sqrt(double(N1*N2));
  for(int j=0; j<N2; j++)
	 for(int i=0; i<N1; i++)
		T(i,j) /= sqrtprod;
  
  csc[0] = T;  //csc[0].resize(N1, N2);  //fdct_wrapping_fftshift(T, csc[0]);
  
  return 0;
}
Example #4
0
//------------------------------------------------------------------------------------
int fdct3d_inverse_center(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s,
							  CpxCrvletPrtd& C, CpxNumTnsBlkd& W)
{
  int mpirank;  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  vector< vector<int> >& Cowners = C.owners();
  
  if(Cowners[0][0]==mpirank) {
	 int S1, S2, S3;	 int F1, F2, F3;	 double R1, R2, R3;	 fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3);
	 DblOffVec big1(S1);  fdct3d_lowpass(L1, big1);
	 DblOffVec big2(S2);  fdct3d_lowpass(L2, big2);
	 DblOffVec big3(S3);  fdct3d_lowpass(L3, big3);
	 
	 CpxNumTns T(S1,S2,S3);
	 CpxNumTns& Cblk = C.block(0,0); //center block
	 T = Cblk;
	 fftwnd_plan p = fftw3d_create_plan(S3,S2,S1, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
	 fftwnd_one(p, (fftw_complex*)T.data(), NULL);
	 fftwnd_destroy_plan(p);
	 double sqrtprod = sqrt(double(S1*S2*S3));
	 for(int i=0; i<S1; i++)	 for(int j=0; j<S2; j++)		for(int k=0; k<S3; k++)		  T(i,j,k) /= sqrtprod;
	 
	 CpxOffTns A(S1,S2,S3);
	 fdct3d_fftshift(S1,S2,S3,T,A);
	 for(int i=-S1/2; i<-S1/2+S1; i++)
		for(int j=-S2/2; j<-S2/2+S2; j++)
		  for(int k=-S3/2; k<-S3/2+S3; k++) {
			 int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, i,j,k, bi,bj,bk,oi,oj,ok);
			 CpxNumTns& Wblk = W.block(bi,bj,bk);
			 Wblk(oi,oj,ok) += A(i,j,k) * (big1(i)*big2(j)*big3(k));
		  }
	 //done
  }
  return 0;
}
Example #5
0
void testnd_out_of_place(int rank, int *n, fftw_direction dir,
			 fftwnd_plan validated_plan)
{
     int istride, ostride;
     int N, dim, i;
     fftw_complex *in1, *in2, *out1, *out2;
     fftwnd_plan p;
     int flags = measure_flag | wisdom_flag;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex));
     out1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     p = fftwnd_create_plan(rank, n, dir, flags);

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < N; ++i) {
	       int j;
	       c_re(in2[i]) = DRAND();
	       c_im(in2[i]) = DRAND();
	       for (j = 0; j < istride; ++j) {
		    c_re(in1[i * istride + j]) = c_re(in2[i]);
		    c_im(in1[i * istride + j]) = c_im(in2[i]);
	       }
	  }

	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) {
	       int howmany = (istride < ostride) ? istride : ostride;

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    fftwnd_threads(nthreads, p, howmany, in1, istride, 1,
				   out1, ostride, 1);
	       else
		    fftwnd_threads_one(nthreads, p, in1, out1);

	       fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	       for (i = 0; i < howmany; ++i)
		    CHECK(compute_error_complex(out1 + i, ostride, out2, 1, N)
			  < TOLERANCE,
			  "testnd_out_of_place: wrong answer");
	  }
     }

     fftwnd_destroy_plan(p);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(out1);
     fftw_free(in1);
}
Example #6
0
void destroy_maxwell_data(maxwell_data *d)
{
     if (d) {
	  int i;

	  for (i = 0; i < d->nplans; ++i) {
#if defined(HAVE_FFTW3)
	       FFTW(destroy_plan)((fftplan) (d->plans[i]));
	       FFTW(destroy_plan)((fftplan) (d->iplans[i]));
#elif defined(HAVE_FFTW)
#  ifdef HAVE_MPI
#    ifdef SCALAR_COMPLEX
	       fftwnd_mpi_destroy_plan((fftplan) (d->plans[i]));
	       fftwnd_mpi_destroy_plan((fftplan) (d->iplans[i]));
#    else /* not SCALAR_COMPLEX */
	       rfftwnd_mpi_destroy_plan((fftplan) (d->plans[i]));
	       rfftwnd_mpi_destroy_plan((fftplan) (d->iplans[i]));
#    endif /* not SCALAR_COMPLEX */
#  else /* not HAVE_MPI */
#    ifdef SCALAR_COMPLEX
	       fftwnd_destroy_plan((fftplan) (d->plans[i]));
	       fftwnd_destroy_plan((fftplan) (d->iplans[i]));
#    else /* not SCALAR_COMPLEX */
	       rfftwnd_destroy_plan((fftplan) (d->plans[i]));
	       rfftwnd_destroy_plan((fftplan) (d->iplans[i]));
#    endif /* not SCALAR_COMPLEX */
#  endif /* not HAVE_MPI */
#endif /* HAVE FFTW */
	  }

	  free(d->eps_inv);
#if defined(HAVE_FFTW3)
	  FFTW(free)(d->fft_data);
	  if (d->fft_data2 != d->fft_data)
	       FFTW(free)(d->fft_data2);
#else
	  free(d->fft_data);
#endif
	  free(d->k_plus_G);
	  free(d->k_plus_G_normsqr);

	  free(d);
     }
}
Example #7
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_complex *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     in = (fftw_complex *) fftw_malloc(N * howmany_fields *
				       sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					     speed_flag | flags
					     | wisdom_flag | no_vector_flag,
					     in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan(sz.rank, sz.narray,
				    dir, speed_flag | flags 
				    | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (fftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			  in, howmany_fields, 1, 0, 0, 0),
		   in, N * howmany_fields, t);

     fftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
Example #8
0
/*
 * Class:     jfftw_complex_nd_Plan
 * Method:    destroyPlan
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_jfftw_complex_nd_Plan_destroyPlan( JNIEnv* env, jobject obj )
{
	jclass clazz = (*env)->GetObjectClass( env, obj );
	jfieldID id = (*env)->GetFieldID( env, clazz, "plan", "[B" );
	jbyteArray arr = (jbyteArray)(*env)->GetObjectField( env, obj, id );
	unsigned char* carr = (*env)->GetByteArrayElements( env, arr, 0 );

	fftwnd_destroy_plan( *(fftwnd_plan*)carr );

	(*env)->ReleaseByteArrayElements( env, arr, carr, 0 );
	(*env)->SetObjectField( env, obj, id, NULL );
}
Example #9
0
void testnd_correctness(struct size sz, fftw_direction dir,
			int alt_api, int specific, int force_buf)
{
     fftwnd_plan validated_plan;

     validated_plan = fftwnd_create_plan(sz.rank, sz.narray,
					 dir, measure_flag | wisdom_flag);

     testnd_out_of_place(sz.rank, sz.narray, dir, validated_plan);
     testnd_in_place(sz.rank, sz.narray, dir, validated_plan, alt_api, specific, force_buf);

     fftwnd_destroy_plan(validated_plan);
}
Example #10
0
void Wavelet::fft1DInPlace()
{
  // use the operator version of the fourier transform
  if(isReal_) {
    int flag;
    rfftwnd_plan plan;
    flag    = FFTW_ESTIMATE | FFTW_IN_PLACE;
    plan    = rfftwnd_create_plan(1, &nzp_ ,FFTW_REAL_TO_COMPLEX,flag);
    //
    // NBNB-PAL: The call rfftwnd_on_real_to_complex is causing UMRs in Purify.
    //
    rfftwnd_one_real_to_complex(plan,rAmp_,cAmp_);
    fftwnd_destroy_plan(plan);
    isReal_ = false;
  }
}
Example #11
0
void Wavelet::invFFT1DInPlace()
{
  // use the operator version of the fourier transform
  if(!isReal_) {
    int flag;
    rfftwnd_plan plan;

    flag = FFTW_ESTIMATE | FFTW_IN_PLACE;
    plan= rfftwnd_create_plan(1,&nzp_,FFTW_COMPLEX_TO_REAL,flag);
    rfftwnd_one_complex_to_real(plan,cAmp_,rAmp_);
    fftwnd_destroy_plan(plan);
    isReal_=true;
    double scale= static_cast<double>(1.0/static_cast<double>(nzp_));
    for(int i=0; i < nzp_; i++)
      rAmp_[i] = static_cast<fftw_real>(rAmp_[i]*scale);
  }
}
Example #12
0
void CMainFrame::Fft_back(Complex *array) {
	int m_iDem = m_iDem_ampl;
	fftw_complex *in = new fftw_complex[m_iDem * m_iDem];
	size_t dxdy = m_iDem * m_iDem;
	
	for(size_t i = 0; i < dxdy; i++) {
		in[i].re = array[i].re;
		in[i].im = array[i].im;
	}

	fftwnd_plan p = fftw2d_create_plan(m_iDem, m_iDem, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
	fftwnd_one(p, in, NULL);
	fftwnd_destroy_plan(p);
	
	for(size_t i = 0; i < dxdy; i++) {
		array[i].re = (float) in[i].re;
		array[i].im = (float) in[i].im;
	}
	delete[] in;
}
//---------------------
int fdct_wrapping_invwavelet(vector<CpxNumMat>& csc, CpxOffMat& Xhgh)
{
  assert(csc.size()==1);
  CpxNumMat& C = csc[0];
  int N1 = C.m();  int N2 = C.n();
  
  CpxNumMat T(C);  //CpxNumMat T(N1, N2);  fdct_wrapping_ifftshift(N1, N2, F1, F2, C, T);
  
  fftwnd_plan p = fftw2d_create_plan(N2, N1, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
  fftwnd_one(p, (fftw_complex*)T.data(), NULL);
  fftwnd_destroy_plan(p);
  double sqrtprod = sqrt(double(N1*N2));
  for(int j=0; j<N2; j++)
	 for(int i=0; i<N1; i++)
		T(i,j) /= sqrtprod;
  
  Xhgh.resize(N1, N2);
  fdct_wrapping_fftshift(T, Xhgh);
  return 0;
}
void testnd_correctness(struct size sz, fftw_direction dir,
			int alt_api, int specific, int force_buf)
{
     fftwnd_plan validated_plan;

     if (dir != FFTW_FORWARD)
	  return;
     if (force_buf)
	  return;

     validated_plan = fftwnd_create_plan(sz.rank, sz.narray, 
					 dir, measure_flag | wisdom_flag);
     CHECK(validated_plan != NULL, "can't create plan");

     testnd_out_of_place(sz.rank, sz.narray, validated_plan);
     testnd_in_place(sz.rank, sz.narray,
		     validated_plan, alt_api, specific);

     fftwnd_destroy_plan(validated_plan);
}
Example #15
0
static void iDoFFT(void *map, int width, int height, int inverse, int center, int normalize)
{
  if (inverse && center)
    iCenterFFT((im_complex*)map, width, height, inverse);

#ifdef USE_FFTW3
#if (IM_COMPLEX==IM_FLOAT)
  fftwf_plan plan = fftwf_plan_dft_2d(height, width, 
                      (fftwf_complex*)map, (fftwf_complex*)map, // in-place transform
                      inverse?FFTW_BACKWARD:FFTW_FORWARD, FFTW_ESTIMATE);
  fftwf_execute(plan);
  fftwf_destroy_plan(plan);
#else
  fftw_plan plan = fftw_plan_dft_2d(height, width, 
                     (fftw_complex*)map, (fftw_complex*)map, // in-place transform
                     inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
  fftw_execute(plan);
  fftw_destroy_plan(plan);
#endif
#else
  fftwnd_plan plan = fftw2d_create_plan(height, width, inverse?FFTW_BACKWARD:FFTW_FORWARD, FFTW_ESTIMATE|FFTW_IN_PLACE);
  fftwnd(plan, 1, (FFTW_COMPLEX*)map, 1, 0, 0, 0, 0);
  fftwnd_destroy_plan(plan);
#endif

  if (!inverse && center)
    iCenterFFT((im_complex*)map, width, height, inverse);

  if (normalize)
  {
    im_real NM = (im_real)(width * height);
    int count = (int)(2*NM);

    if (normalize == 1)
      NM = (im_real)sqrt(NM);

    im_real *fmap = (im_real*)map;
    for (int i = 0; i < count; i++)
      *fmap++ /= NM;
  }
}
Example #16
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int dxm[4], dxn[4], ixpm, ixpn;
  int sid;
  double *disc  = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  int do_gt   = 0;
  char filename[100];
  double ratime, retime;
  double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2;
  double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18];
  double *gauge_trafo=(double*)NULL;
  complex w, w1, w2, *cp1, *cp2, *cp3;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
  int *status;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");

  /* read the input file */
  read_input(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);
#ifdef MPI
  if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) {
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(7);
  }
#endif

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
  xchange_gauge();

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq);

  if(do_gt==1) {
    /***********************************
     * initialize gauge transformation
     ***********************************/
    init_gauge_trafo(&gauge_trafo, 1.);
    apply_gt_gauge(gauge_trafo);
    plaquette(&plaq);
    if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq);
  }

  /****************************************
   * allocate memory for the spinor fields
   ****************************************/
  no_fields = 3;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

  work  = (double*)calloc(48*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  /***********************************************
   * start loop on source id.s 
   ***********************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid++) {

    /********************************
     * read the first propagator
     ********************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    if(do_gt==1) {
      /******************************************
       * gauge transform the propagators for sid
       ******************************************/
      for(ix=0; ix<VOLUME; ix++) {
        _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix));
        _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1);
      }
      xchange_field(g_spinor_field[2]);
    }

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);


    /************************************************
     * HPE: apply BH5 
     ************************************************/
    BH5(g_spinor_field[1], g_spinor_field[2]);

    for(ix=0; ix<8*VOLUME; ix++) {disc[ix] = 0.;}

    /* add new contractions to (existing) disc */
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

	iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime);

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

    /* Fourier transform data, copy to work */
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }

    /********************************
     * read the second propagator
     ********************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid+g_resume);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid+g_resume);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    if(do_gt==1) {
      /******************************************
       * gauge transform the propagators for sid
       ******************************************/
      for(ix=0; ix<VOLUME; ix++) {
        _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix));
        _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1);
      }
      xchange_field(g_spinor_field[2]);
    }

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);


    /************************************************
     * HPE: apply BH5 
     ************************************************/
    BH5(g_spinor_field[1], g_spinor_field[2]);

    for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

    /* add new contractions to (existing) disc */
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

	iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime);

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

    /* Fourier transform data, copy to work */
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }

    fnorm = 1. / ((double)(T_global*LX*LY*LZ));
    fprintf(stdout, "fnorm = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(work+_GWI(mu,0,VOLUME));
      cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME));
      cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME));
     
      for(x0=0; x0<T; x0++) {
        q[0] = (double)(x0+Tstart) / (double)T_global;
      for(x1=0; x1<LX; x1++) {
        q[1] = (double)(x1) / (double)LX;
      for(x2=0; x2<LY; x2++) {
        q[2] = (double)(x2) / (double)LY;
      for(x3=0; x3<LZ; x3++) {
        q[3] = (double)(x3) / (double)LZ;
        ix = g_ipt[x0][x1][x2][x3];
        w.re = cos( M_PI * (q[mu]-q[nu]) );
	w.im = sin( M_PI * (q[mu]-q[nu]) );
	_co_eq_co_ti_co(&w1, cp1, cp2);
	_co_eq_co_ti_co(cp3, &w1, &w);
	_co_ti_eq_re(cp3, fnorm);
	cp1++; cp2++; cp3++;
      }
      }
      }
      }

    }
    }
  
    /* save the result in momentum space */
    sprintf(filename, "cvc_hpe5_ft.%.4d.%.2d", Nconf, sid);
    write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 0, 0);

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to save cvc results: %e seconds\n", retime-ratime);

  }  /* of loop on sid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc);

  free(work);

#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  free(status);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}
Example #17
0
File: fftwnd.c Project: Pinkii-/PCA
/*
 * Create an fftwnd_plan specialized for specific arrays.  (These
 * arrays are ignored, however, if they are NULL or if the flags do
 * not include FFTW_MEASURE.)  The main advantage of being provided
 * arrays like this is that we can do runtime timing measurements of
 * our options, without worrying about allocating excessive scratch
 * space.
 */
fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n,
					fftw_direction dir, int flags,
					fftw_complex *in, int istride,
					fftw_complex *out, int ostride)
{
     fftwnd_plan p;

     if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags)))
	  return 0;

     if (!(flags & FFTW_MEASURE) || in == 0
	 || (!p->is_in_place && out == 0)) {

/**** use default plan ****/

	  p->plans = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank),
						 rank, n, dir, flags);
	  if (!p->plans) {
	       fftwnd_destroy_plan(p);
	       return 0;
	  }
	  if (flags & FFTWND_FORCE_BUFFERED)
	       p->nbuffers = FFTWND_NBUFFERS;
	  else
	       p->nbuffers = FFTWND_DEFAULT_NBUFFERS;

	  p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
	  if (p->nwork && !(flags & FFTW_THREADSAFE)) {
	       p->work = (fftw_complex*) fftw_malloc(p->nwork 
						     * sizeof(fftw_complex));
	       if (!p->work) {
		    fftwnd_destroy_plan(p);
		    return 0;
	       }
	  }
     } else {
/**** use runtime measurements to pick plan ****/

	  fftw_plan *plans_buf, *plans_nobuf;
	  double t_buf, t_nobuf;

	  p->nwork = fftwnd_work_size(rank, n, flags, FFTWND_NBUFFERS + 1);
	  if (p->nwork && !(flags & FFTW_THREADSAFE)) {
	       p->work = (fftw_complex*) fftw_malloc(p->nwork 
						     * sizeof(fftw_complex));
	       if (!p->work) {
		    fftwnd_destroy_plan(p);
		    return 0;
	       }
	  }
	  else
	       p->work = (fftw_complex*) NULL;

	  /* two possible sets of 1D plans: */
	  plans_buf = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank),
						  rank, n, dir, flags);
	  plans_nobuf = 
	       fftwnd_create_plans_specific(fftwnd_new_plan_array(rank),
					    rank, n, p->n_after, dir,
					    flags, in, istride,
					    out, ostride);
	  if (!plans_buf || !plans_nobuf) {
	       destroy_plan_array(rank, plans_nobuf);
	       destroy_plan_array(rank, plans_buf);
	       fftwnd_destroy_plan(p);
	       return 0;
	  }
	  /* time the two possible plans */
	  p->plans = plans_nobuf;
	  p->nbuffers = 0;
	  p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
	  t_nobuf = fftwnd_measure_runtime(p, in, istride, out, ostride);
	  p->plans = plans_buf;
	  p->nbuffers = FFTWND_NBUFFERS;
	  p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
	  t_buf = fftwnd_measure_runtime(p, in, istride, out, ostride);

	  /* pick the better one: */
	  if (t_nobuf < t_buf) {	/* use unbuffered transform */
	       p->plans = plans_nobuf;
	       p->nbuffers = 0;

	       /* work array is unnecessarily large */
	       if (p->work)
		    fftw_free(p->work);
	       p->work = 0;

	       destroy_plan_array(rank, plans_buf);

	       /* allocate a work array of the correct size: */
	       p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
	       if (p->nwork && !(flags & FFTW_THREADSAFE)) {
		    p->work = (fftw_complex*) fftw_malloc(p->nwork 
						       * sizeof(fftw_complex));
		    if (!p->work) {
			 fftwnd_destroy_plan(p);
			 return 0;
		    }
	       }
	  } else {		/* use buffered transform */
	       destroy_plan_array(rank, plans_nobuf);
	  }
     }

     return p;
}
void F77_FUNC_(fftwnd_f77_destroy_plan,FFTWND_F77_DESTROY_PLAN)
(fftwnd_plan *p)
{
     fftwnd_destroy_plan(*p);
}
Example #19
0
//-----------------------------------------------------------------------
int fdct_wrapping_sepangle(double XL1, double XL2, int nbangle, CpxOffMat& Xhgh, vector<CpxNumMat>& csc)
{
  //WEDGE ORDERING: from -45 degree, counter-clockwise
  typedef pair<int,int> intpair;
  map<intpair, fftwnd_plan> planmap;
  
  int nbquadrants = 4;
  int nd = nbangle / 4;
  int wcnt = 0;
  
  //backup
  CpxOffMat Xhghb(Xhgh);
  double XL1b = XL1;  double XL2b = XL2;
  
  int qvec[] = {2,1,0,3};
  for(int qi=0; qi<nbquadrants; qi++) {
	 int q = qvec[qi];
	 //ROTATE data to its right position
	 fdct_wrapping_rotate_forward(q, XL1b, XL2b, XL1, XL2);	 XL1 = abs(XL1);	 XL2 = abs(XL2);
	 fdct_wrapping_rotate_forward(q, Xhghb, Xhgh);
	 //figure out XS, XF, XR
	 double XW1 = XL1/nd;	 double XW2 = XL2/nd;
	 int XS1, XS2;  int XF1, XF2;  double XR1, XR2;  fdct_wrapping_rangecompute(XL1, XL2, XS1, XS2, XF1, XF2, XR1, XR2);
	 for(int w=nd-1; w>=0; w--) {
		double xs = XR1/4 - (XW1/2)/4;
		double xe = XR1;
		double ys = -XR2 + (w-0.5)*XW2;
		double ye = -XR2 + (w+1.5)*XW2; //x range
		int xn = int(ceil(xe-xs));			 int yn = int(ceil(ye-ys));
		//MAKE THEM ODD
		if(xn%2==0) xn++;		if(yn%2==0) yn++;
		int xf = int(ceil(xs));			 //int yf = int(ceil(ys));
		//theta
		double thts, thtm, thte; //y direction
		if(w==0) {
		  thts = atan2(-1.0, 1.0-1.0/nd);
		  thtm = atan2(-1.0+1.0/nd, 1.0);
		  thte = atan2(-1.0+3.0/nd, 1.0);
		} else if(w==nd-1) {
		  thts = atan2(-1.0+(2.0*w-1.0)/nd, 1.0);
		  thtm = atan2(-1.0+(2.0*w+1.0)/nd, 1.0);
		  thte = atan2(1.0, 1.0-1.0/nd);
		} else {
		  thts = atan2(-1.0+(2.0*w-1.0)/nd, 1.0);
		  thtm = atan2(-1.0+(2.0*w+1.0)/nd, 1.0);
		  thte = atan2(-1.0+(2.0*w+3.0)/nd, 1.0);
		}
		//wrapping
		int xh = xn/2;		int yh = yn/2; //half length
		double R21 = XR2/XR1; //ratio
		CpxOffMat wpdata(xn,yn);
		for(int xcur=xf; xcur<xe; xcur++) { //for each layer
		  int yfm = (int)ceil( max(-XR2, R21*xcur*tan(thts)) );
		  int yto = (int)floor( min(XR2, R21*xcur*tan(thte)) );
		  for(int ycur=yfm; ycur<=yto; ycur++) {
			 int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
			 int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
			 wpdata(tmpx,tmpy) = Xhgh(xcur,ycur);
			 //partition of unity
			 double thtcur = atan2(ycur/XR2, xcur/XR1);
			 double wtht;
			 if(thtcur<thtm) {
				double l,r; fdct_wrapping_window((thtcur-thts)/(thtm-thts), l, r);
				wtht = l;
			 } else {
				double l,r; fdct_wrapping_window((thtcur-thtm)/(thte-thtm), l, r);
				wtht = r;
			 }
			 double pou = wtht;
			 wpdata(tmpx,tmpy) *= pou;
		  }
		}
		//IFFT
		{
		  //rotate backward
		  CpxOffMat rpdata;
		  fdct_wrapping_rotate_backward(q, wpdata, rpdata);
		  //ifftshift
		  int xn = rpdata.m();		int yn = rpdata.n(); //reset xn, yn
		  CpxNumMat tpdata(xn,yn);
		  fdct_wrapping_ifftshift(rpdata, tpdata);
		  //ifft
		  fftwnd_plan p = NULL;
		  map<intpair,fftwnd_plan>::iterator mit=planmap.find( intpair(xn,yn) );
		  if(mit!=planmap.end()) {
			 p = (*mit).second;
		  } else {
			 p = fftw2d_create_plan(yn, xn, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ intpair(xn, yn) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);
		  double sqrtprod = sqrt(double(xn*yn));
		  for(int j=0; j<yn; j++)		  for(int i=0; i<xn; i++)			 tpdata(i,j) /= sqrtprod;
		  //store
		  csc[wcnt] = tpdata;
		}
		
		//fdct_wrapping_fftshift(xn,yn,xh,yh,tpdata,wpdata);
		//ROTATION
		//fdct_wrapping_rotate_backward(q, wpdata, csc[wcnt]);
		
		wcnt++;
	 } //end of w loop
  } //end of q loop
  //PUT THE RIGHT DATA BACK
  Xhgh = Xhghb;
  XL1 = XL1b;  XL2 = XL2b;
  
  for(map<intpair, fftwnd_plan>::iterator mit=planmap.begin(); mit!=planmap.end(); mit++) {
	 fftwnd_plan p = (*mit).second;
	 fftwnd_destroy_plan(p);
  }
  return 0;
}
Example #20
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int dxm[4], dxn[4], ixpm, ixpn;
  int sid;
  double *disc  = (double*)NULL;
  double *disc2 = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  int do_gt   = 0;
  char filename[100], contype[200];
  double ratime, retime;
  double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2;
  double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18];
  double *gauge_trafo=(double*)NULL;
  complex w, w1, w2, *cp1, *cp2, *cp3;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
#ifdef MPI
  xchange_gauge();
#endif

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq);

  if(do_gt==1) {
    /***********************************
     * initialize gauge transformation
     ***********************************/
    init_gauge_trafo(&gauge_trafo, 1.);
    apply_gt_gauge(gauge_trafo);
    plaquette(&plaq);
    if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq);
  }

  /****************************************
   * allocate memory for the spinor fields
   ****************************************/
  no_fields = 3;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

  disc2 = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc2 == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc2\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.;

  work  = (double*)calloc(48*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  /************************************************
   * HPE: calculate coeff. of 3rd order term
   ************************************************/
  _2kappamu    = 2. * g_kappa * g_mu;
  onepmutilde2 = 1. + _2kappamu * _2kappamu;
  mutilde2     = _2kappamu * _2kappamu;

  hpe3_coeff   = 16. * g_kappa*g_kappa*g_kappa*g_kappa * (1. + 6. * mutilde2 + mutilde2*mutilde2) / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2;

/*
  hpe3_coeff = 8. * g_kappa*g_kappa*g_kappa * \
        (1. + 6.*_2kappamu*_2kappamu + _2kappamu*_2kappamu*_2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu);
*/
  fprintf(stdout, "hpe3_coeff = %25.16e\n", hpe3_coeff);

  /************************************************
   * HPE: calculate the plaquette terms 
   ************************************************/

  for(ix=0; ix<VOLUME; ix++) {
    for(mu=0; mu<4; mu++) { 
      for(i=1; i<4; i++) {
        nu = (mu+i)%4;
        _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(ix,mu), g_gauge_field+_GGI(g_iup[ix][mu],nu) );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(ix,nu), g_gauge_field+_GGI(g_iup[ix][nu],mu) );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w1, U_);

        iix = g_idn[ix][nu];
        _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(iix,mu), g_gauge_field+_GGI(g_iup[iix][mu],nu) );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(iix,nu), g_gauge_field+_GGI(g_iup[iix][nu],mu) );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w2, U_);
        disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im - w2.im);

/*
        _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(g_idn[ix][nu],nu), g_gauge_field+_GGI(ix,mu) );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(g_idn[ix][nu],mu), g_gauge_field+_GGI(g_iup[g_idn[ix][nu]][mu], nu) );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w2, U_);
        disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im);
*/


/*        fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e +i %25.16e; w2=%25.16e +i %25.16e\n", 
            mu, ix, nu, w1.re, w1.im, w2.re, w2.im); */
      }  /* of nu */

      /****************************************
       * - in case lattice size equals 4 
       *   calculate additional loop term
       * - _NOTE_ the possible minus sign from
       *   the fermionic boundary conditions
       ****************************************/
      if(dims[mu]==4) {
        wilson_loop(&w, ix, mu, dims[mu]);
        fnorm = -64. * g_kappa*g_kappa*g_kappa*g_kappa / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2; 
        disc2[_GWI(mu,ix,VOLUME)+1] += fnorm * w.im;
/*        fprintf(stdout, "loop contribution: ix=%5d, mu=%2d, fnorm=%25.16e, w=%25.16e\n", ix, mu, fnorm, w.im); */
      }
/*
      fprintf(stdout, "-------------------------------------------\n");
      fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]);
      fprintf(stdout, "-------------------------------------------\n");
*/
    }
  }
/*
  sprintf(filename, "avc_disc_hpe5_3rd.%.4d", Nconf);
  ofs = fopen(filename, "w");
  for(ix=0; ix<VOLUME; ix++) {
    for(mu=0; mu<4; mu++) { 
      fprintf(ofs, "%6d%3d%25.16e\t%25.16e\n", ix, mu, disc[_GWI(mu,ix,VOLUME)], disc[_GWI(mu,ix,VOLUME)+1]);
    }
  }
  fclose(ofs);
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;
*/
/*
  for(x0=0; x0<T; x0++) {
  for(x1=0; x1<LX; x1++) {
  for(x2=0; x2<LY; x2++) {
  for(x3=0; x3<LZ; x3++) {
    ix = g_ipt[x0][x1][x2][x3];
    for(mu=0; mu<4; mu++) {
      dxm[0]=0; dxm[1]=0; dxm[2]=0; dxm[3]=0; dxm[mu]=1;

      for(i=1; i<4; i++) {
        nu = (mu+i)%4;
        dxn[0]=0; dxn[1]=0; dxn[2]=0; dxn[3]=0; dxn[nu]=1;

        ixpm = g_ipt[(x0+dxm[0]+T)%T][(x1+dxm[1]+LX)%LX][(x2+dxm[2]+LY)%LY][(x3+dxm[3]+LZ)%LZ];
        ixpn = g_ipt[(x0+dxn[0]+T)%T][(x1+dxn[1]+LX)%LX][(x2+dxn[2]+LY)%LY][(x3+dxn[3]+LZ)%LZ];

        _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ix+18*mu, g_gauge_field + 72*ixpm+18*nu );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ix+18*nu, g_gauge_field + 72*ixpn+18*mu );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w1, U_);

        ixpm = g_ipt[(x0+dxm[0]-dxn[0]+T)%T][(x1+dxm[1]-dxn[1]+LX)%LX][(x2+dxm[2]-dxn[2]+LY)%LY][(x3+dxm[3]-dxn[3]+LZ)%LZ];
        ixpn = g_ipt[(x0-dxn[0]+T)%T][(x1-dxn[1]+LX)%LX][(x2-dxn[2]+LY)%LY][(x3-dxn[3]+LZ)%LZ];

        _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ixpn+18*nu, g_gauge_field + 72*ix+18*mu);
        _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ixpn+18*mu, g_gauge_field + 72*ixpm+18*nu);
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w2, U_);

        disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im);
        fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e; w2=%25.16e\n", mu, ix, nu, w1.im, w2.im); 
      }
      fprintf(stdout, "-------------------------------------------\n");
      fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]);
      fprintf(stdout, "-------------------------------------------\n");
    }
  }
  }
  }
  }
*/

  /***********************************************
   * start loop on source id.s 
   ***********************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {

    /* read the new propagator */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    if(do_gt==1) {
      /******************************************
       * gauge transform the propagators for sid
       ******************************************/
      for(ix=0; ix<VOLUME; ix++) {
        _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix));
        _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1);
      }
      xchange_field(g_spinor_field[2]);
    }

    count++;

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);


    /************************************************
     * HPE: apply BH5 
     ************************************************/
    BH5(g_spinor_field[1], g_spinor_field[2]);

    /* add new contractions to (existing) disc */
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

	iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime);


    /************************************************
     * save results for count = multiple of Nsave 
     ************************************************/
    if(count%Nsave == 0) {

      if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count);

      fnorm = 1. / ( (double)count * g_prop_normsqr );
      if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
        for(ix=0; ix<VOLUME; ix++) {
          work[_GWI(mu,ix,VOLUME)  ] = disc[_GWI(mu,ix,VOLUME)  ] * fnorm + disc2[_GWI(mu,ix,VOLUME)  ];
          work[_GWI(mu,ix,VOLUME)+1] = disc[_GWI(mu,ix,VOLUME)+1] * fnorm + disc2[_GWI(mu,ix,VOLUME)+1];
        }
      }

      /* save the result in position space */
      sprintf(filename, "cvc_hpe5_X.%.4d.%.4d", Nconf, count);
      sprintf(contype, "cvc-disc-all-hpe-05-X");
      write_lime_contraction(work, filename, 64, 4, contype, Nconf, count);

/*
      sprintf(filename, "cvc_hpe5_Xascii.%.4d.%.4d", Nconf, count);
      write_contraction(work, NULL, filename, 4, 2, 0);
*/

#ifdef MPI
      ratime = MPI_Wtime();
#else
      ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

      /* Fourier transform data, copy to work */
      for(mu=0; mu<4; mu++) {
        memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_m, in, NULL);
#endif
        memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));


        memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_p, in, NULL);
#endif
        memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
      }  /* of mu =0 ,..., 3*/

      fnorm = 1. / (double)(T_global*LX*LY*LZ);
      if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
      for(nu=0; nu<4; nu++) {
        cp1 = (complex*)(work+_GWI(mu,0,VOLUME));
        cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME));
        cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME));
     
        for(x0=0; x0<T; x0++) {
	  q[0] = (double)(x0+Tstart) / (double)T_global;
        for(x1=0; x1<LX; x1++) {
	  q[1] = (double)(x1) / (double)LX;
        for(x2=0; x2<LY; x2++) {
	  q[2] = (double)(x2) / (double)LY;
        for(x3=0; x3<LZ; x3++) {
	  q[3] = (double)(x3) / (double)LZ;
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re = cos( M_PI * (q[mu]-q[nu]) );
	  w.im = sin( M_PI * (q[mu]-q[nu]) );
	  _co_eq_co_ti_co(&w1, cp1, cp2);
	  _co_eq_co_ti_co(cp3, &w1, &w);
	  _co_ti_eq_re(cp3, fnorm);
	  cp1++; cp2++; cp3++;
	}
	}
	}
	}

      }
      }
  
      /* save the result in momentum space */
      sprintf(filename, "cvc_hpe5_P.%.4d.%.4d", Nconf, count);
      sprintf(contype, "cvc-disc-all-hpe-05-P");
      write_lime_contraction(work+_GWI(8,0,VOLUME), filename, 64, 16, contype, Nconf, count);
/*
      sprintf(filename, "cvc_hpe5_Pascii.%.4d.%.4d", Nconf, count);
      write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 2, 0);
*/
#ifdef MPI
      retime = MPI_Wtime();
#else
      retime = (double)clock() / CLOCKS_PER_SEC;
#endif
      if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime);

    }  /* of count % Nsave == 0 */

  }  /* of loop on sid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc);

  free(work);

#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}
Example #21
0
void test_planner(int rank)
{
     /* 
      * create and destroy many plans, at random.  Check the
      * garbage-collecting allocator of twiddle factors 
      */
     int i, dim;
     int r, s;
     fftw_plan p[PLANNER_TEST_SIZE];
     fftwnd_plan pnd[PLANNER_TEST_SIZE];
     int *narr, maxdim;

     chk_mem_leak = 0;
     verbose--;

     please_wait();
     if (rank < 1)
	  rank = 1;

     narr = (int *) fftw_malloc(rank * sizeof(int));

     maxdim = (int) pow(8192.0, 1.0/rank);

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  p[i] = (fftw_plan) 0;
	  pnd[i] = (fftwnd_plan) 0;
     }

     for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) {
	  r = rand();
	  if (r < 0)
	       r = -r;
	  r = r % PLANNER_TEST_SIZE;

	  for (dim = 0; dim < rank; ++dim) {
	       do {
		    s = rand();
		    if (s < 0)
			 s = -s;
		    s = s % maxdim + 1;
	       } while (s == 0);
	       narr[dim] = s;
	  }

	  if (rank == 1) {
	       if (p[r])
		    fftw_destroy_plan(p[r]);

	       p[r] = fftw_create_plan(narr[0], random_dir(), measure_flag |
				       wisdom_flag);
	       if (paranoid && narr[0] < 200)
		    test_correctness(narr[0]);
	  }

	  if (pnd[r])
	       fftwnd_destroy_plan(pnd[r]);

	  pnd[r] = fftwnd_create_plan(rank, narr,
				      random_dir(), measure_flag |
				      wisdom_flag);

	  if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) {
	       WHEN_VERBOSE(0, printf("test planner: so far so good\n"));
	       WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n",
			      i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE));
	  }
     }

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  if (p[i])
	       fftw_destroy_plan(p[i]);
	  if (pnd[i])
	       fftwnd_destroy_plan(pnd[i]);
     }

     fftw_free(narr);
     verbose++;
     chk_mem_leak = 1;
}
Example #22
0
void testnd_in_place(int rank, int *n, fftw_direction dir,
		     fftwnd_plan validated_plan,
		     int alternate_api, int specific, int force_buffered)
{
     int istride;
     int N, dim, i;
     fftw_complex *in1, *in2, *out2;
     fftwnd_plan p;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     if (force_buffered)
	  flags |= FFTWND_FORCE_BUFFERED;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (!specific) {
	  if (alternate_api && (rank == 2 || rank == 3)) {
	       if (rank == 2)
		    p = fftw2d_create_plan(n[0], n[1], dir, flags);
	       else
		    p = fftw3d_create_plan(n[0], n[1], n[2], dir, flags);
	  } else		/* standard api */
	       p = fftwnd_create_plan(rank, n, dir, flags);
     } else {			/* specific plan creation */
	  if (alternate_api && (rank == 2 || rank == 3)) {
	       if (rank == 2)
		    p = fftw2d_create_plan_specific(n[0], n[1], dir, flags,
						    in1, 1,
					       (fftw_complex *) NULL, 1);
	       else
		    p = fftw3d_create_plan_specific(n[0], n[1], n[2], dir, flags,
						    in1, 1,
					       (fftw_complex *) NULL, 1);
	  } else		/* standard api */
	       p = fftwnd_create_plan_specific(rank, n, dir, flags,
					       in1, 1,
					       (fftw_complex *) NULL, 1);

     }

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* 
	   * generate random inputs */
	  for (i = 0; i < N; ++i) {
	       int j;
	       c_re(in2[i]) = DRAND();
	       c_im(in2[i]) = DRAND();
	       for (j = 0; j < istride; ++j) {
		    c_re(in1[i * istride + j]) = c_re(in2[i]);
		    c_im(in1[i * istride + j]) = c_im(in2[i]);
	       }
	  }

	  if (istride != 1 || istride != 1 || coinflip())
	       fftwnd(p, istride, in1, istride, 1, (fftw_complex *) NULL, 1, 1);
	  else
	       fftwnd_one(p, in1, NULL);

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (i = 0; i < istride; ++i)
	       CHECK(compute_error_complex(in1 + i, istride, out2, 1, N) < TOLERANCE,
		     "testnd_in_place: wrong answer");
     }

     fftwnd_destroy_plan(p);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
Example #23
0
void rfftwnd_destroy_plan(fftwnd_plan plan)
{
     fftwnd_destroy_plan(plan);
}
Example #24
0
//------------------------------------------------------------------------------------
int fdct3d_inverse_angles(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s,int nd,
							  CpxCrvletPrtd& C, CpxNumTnsBlkd& W)
{
  int mpirank;  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  int mpisize;  MPI_Comm_size(MPI_COMM_WORLD, &mpisize);
  vector< vector<int> >& Cowners = C.owners();
  vector<int>& crvowners = Cowners[s]; //LEXING: the owner information for wedges in scale s
  
  int nf = 6;
  int wcnt = 0;
  int S1, S2, S3;	 int F1, F2, F3;	 double R1, R2, R3;	 fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3);
  DblOffVec big1(S1);  fdct3d_lowpass(L1, big1);
  DblOffVec big2(S2);  fdct3d_lowpass(L2, big2);
  DblOffVec big3(S3);  fdct3d_lowpass(L3, big3);
  
  double Lh1 = L1/2;  double Lh2 = L2/2;  double Lh3 = L3/2;
  int Sh1, Sh2, Sh3;	 int Fh1, Fh2, Fh3;	 double Rh1, Rh2, Rh3;	 fdct3d_rangecompute(Lh1, Lh2, Lh3, Sh1, Sh2, Sh3, Fh1, Fh2, Fh3, Rh1, Rh2, Rh3);
  DblOffVec sma1(S1);  fdct3d_lowpass(Lh1, sma1);
  DblOffVec sma2(S2);  fdct3d_lowpass(Lh2, sma2);
  DblOffVec sma3(S3);  fdct3d_lowpass(Lh3, sma3);
  
  double W1 = L1/nd;  double W2 = L2/nd;  double W3 = L3/nd;
  
  typedef pair<int,int> intpair;  typedef pair<int, intpair> inttriple;
  map<inttriple, fftwnd_plan> planmap;

  //face 0: x,y,z
  for(int h=0; h<nd; h++) { //(y first z second)
	 for(int g=0; g<nd; g++) {
		if(crvowners[wcnt]==mpirank) {
		  double xs = R1/4-(W1/2)/4;		double xe = R1;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  double zs = -R3 + (2*h-1)*W3/2;		double ze = -R3 + (2*h+3)*W3/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(g==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(h==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2; //half
		  double R21 = R2/R1;		  double R31 = R3/R1;
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int xcur=(int)ceil(xs); xcur<xe; xcur++) {
			 int yfm = (int)ceil( max(-R2, R21*xcur*tan(thts)) );
			 int yto = (int)floor( min(R2, R21*xcur*tan(thte)) );
			 int zfm = (int)ceil( max(-R3, R31*xcur*tan(phis)) );
			 int zto = (int)floor( min(R3, R31*xcur*tan(phie)) );
			 for(int ycur=yfm; ycur<=yto; ycur++)
				for(int zcur=zfm; zcur<=zto; zcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(ycur/R2, xcur/R1);
				  double phicur = atan2(zcur/R3, xcur/R1);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(g==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(g==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(h==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(h==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //xcur
		} //if
		wcnt++;
	 }
  } //end of face
  //face 1. y z x
  for(int f=0; f<nd; f++) {
	 for(int h=0; h<nd; h++) {
		if(crvowners[wcnt]==mpirank) {
		  double ys = R2/4-(W2/2)/4;		  double ye = R2;
		  double zs = -R3 + (2*h-1)*W3/2;		  double ze = -R3 + (2*h+3)*W3/2;
		  double xs = -R1 + (2*f-1)*W1/2;		  double xe = -R1 + (2*f+3)*W1/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //z to y
		  if(h==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(f==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R32 = R3/R2;		  double R12 = R1/R2;
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);

		  for(int ycur=(int)ceil(ys); ycur<ye; ycur++) {
			 int zfm = (int)ceil( max(-R3, R32*ycur*tan(thts)) );
			 int zto = (int)floor( min(R3, R32*ycur*tan(thte)) );
			 int xfm = (int)ceil( max(-R1, R12*ycur*tan(phis)) );
			 int xto = (int)floor( min(R1, R12*ycur*tan(phie)) );
			 for(int zcur=zfm; zcur<=zto; zcur++)
				for(int xcur=xfm; xcur<=xto; xcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(zcur/R3, ycur/R2);
				  double phicur = atan2(xcur/R1, ycur/R2);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK
				  double wtht;
				  if(thtcur<thtm) {
					 if(h==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(h==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(f==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(f==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //ycur
		}//if
		wcnt++;
	 }
  }//end of face
  //face 2. z x y
  for(int g=0; g<nd; g++) {
	 for(int f=0; f<nd; f++) {
		if(crvowners[wcnt]==mpirank) {
		  double zs = R3/4-(W3/2)/4;		double ze = R3;
		  double xs = -R1 + (2*f-1)*W1/2;		double xe = -R1 + (2*f+3)*W1/2;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(f==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(g==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R13 = double(F1)/double(F3);		  double R23 = double(F2)/double(F3);

		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int zcur=(int)ceil(zs); zcur<ze; zcur++) {
			 int xfm = (int)ceil( max(-R1, R13*zcur*tan(thts)) );
			 int xto = (int)floor( min(R1, R13*zcur*tan(thte)) );
			 int yfm = (int)ceil( max(-R2, R23*zcur*tan(phis)) );
			 int yto = (int)floor( min(R2, R23*zcur*tan(phie)) );
			 for(int xcur=xfm; xcur<=xto; xcur++)
				for(int ycur=yfm; ycur<=yto; ycur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(xcur/R1, zcur/R3);
				  double phicur = atan2(ycur/R2, zcur/R3);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(f==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(f==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(g==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(g==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  }//zcur
		}//if
		wcnt++;
	 }
  }//end of face
  //face 3: -x,-y,-z
  for(int h=nd-1; h>=0; h--) {
	 for(int g=nd-1; g>=0; g--) {
		if(crvowners[wcnt]==mpirank) {
		  double xs = -R1;		  double xe = -R1/4+(W1/2)/4;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  double zs = -R3 + (2*h-1)*W3/2;		double ze = -R3 + (2*h+3)*W3/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(g==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(h==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R21 = R2/R1;		  double R31 = R3/R1;
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int xcur=(int)ceil(xs); xcur<xe; xcur++) {
			 int yfm = (int)ceil( max(-R2, R21*(-xcur)*tan(thts)) );
			 int yto = (int)floor( min(R2, R21*(-xcur)*tan(thte)) );
			 int zfm = (int)ceil( max(-R3, R31*(-xcur)*tan(phis)) );
			 int zto = (int)floor( min(R3, R31*(-xcur)*tan(phie)) );
			 for(int ycur=yfm; ycur<=yto; ycur++)
				for(int zcur=zfm; zcur<=zto; zcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(ycur/R2, (-xcur)/R1);
				  double phicur = atan2(zcur/R3, (-xcur)/R1);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(g==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(g==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(h==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(h==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //xcur
		} //if
		wcnt++;
	 }
  } //end of face
  //face 4: -y,-z,-x
  for(int f=nd-1; f>=0; f--) {
	 for(int h=nd-1; h>=0; h--) {
		if(crvowners[wcnt]==mpirank) {
		  double ys = -R2;		  double ye = -R2/4+(W2/2)/4;
		  double zs = -R3 + (2*h-1)*W3/2;		  double ze = -R3 + (2*h+3)*W3/2;
		  double xs = -R1 + (2*f-1)*W1/2;		  double xe = -R1 + (2*f+3)*W1/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //z to y
		  if(h==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(f==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R32 = double(F3)/double(F2);		  double R12 = double(F1)/double(F2);

		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int ycur=(int)ceil(ys); ycur<ye; ycur++) {
			 int zfm = (int)ceil( max(-R3, R32*(-ycur)*tan(thts)) );
			 int zto = (int)floor( min(R3, R32*(-ycur)*tan(thte)) );
			 int xfm = (int)ceil( max(-R1, R12*(-ycur)*tan(phis)) );
			 int xto = (int)floor( min(R1, R12*(-ycur)*tan(phie)) );
			 for(int zcur=zfm; zcur<=zto; zcur++)
				for(int xcur=xfm; xcur<=xto; xcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(zcur/R3, (-ycur)/R2);
				  double phicur = atan2(xcur/R1, (-ycur)/R2);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK
				  double wtht;
				  if(thtcur<thtm) {
					 if(h==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(h==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(f==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(f==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //ycur
		}//if
		wcnt++;
	 }
  }//end of face
  //face 5.-z,-x,-y
  for(int g=nd-1; g>=0; g--) {
	 for(int f=nd-1; f>=0; f--) {
		if(crvowners[wcnt]==mpirank) {
		  double zs = -R3;		  double ze = -R3/4+(W3/2)/4;
		  double xs = -R1 + (2*f-1)*W1/2;		double xe = -R1 + (2*f+3)*W1/2;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(f==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(g==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R13 = double(F1)/double(F3);		  double R23 = double(F2)/double(F3);
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int zcur=(int)ceil(zs); zcur<ze; zcur++) {
			 int xfm = (int)ceil( max(-R1, R13*(-zcur)*tan(thts)) );
			 int xto = (int)floor( min(R1, R13*(-zcur)*tan(thte)) );
			 int yfm = (int)ceil( max(-R2, R23*(-zcur)*tan(phis)) );
			 int yto = (int)floor( min(R2, R23*(-zcur)*tan(phie)) );
			 for(int xcur=xfm; xcur<=xto; xcur++)
				for(int ycur=yfm; ycur<=yto; ycur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(xcur/R1, (-zcur)/R3);
				  double phicur = atan2(ycur/R2, (-zcur)/R3);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(f==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(f==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(g==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(g==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  }//zcur
		}//if
		wcnt++;
	 }
  }//end of face
  iA(wcnt==nd*nd*nf);
  
  //remove plans
  for(map<inttriple, fftwnd_plan>::iterator mit=planmap.begin(); mit!=planmap.end(); mit++) {
	 fftwnd_plan p = (*mit).second;
	 fftwnd_destroy_plan(p);
  }
  
  return 0;
}
Example #25
0
int main(int argc, char **argv) {
  
  const int n_c=3;
  const int n_s=4;
  const char outfile_prefix[] = "delta_pp_2pt_v3";

  int c, i, icomp;
  int filename_set = 0;
  int append, status;
  int l_LX_at, l_LXstart_at;
  int ix, it, iix, x1,x2,x3;
  int ir, ir2, is;
  int VOL3;
  int do_gt=0;
  int dims[3];
  double *connt=NULL;
  spinor_propagator_type *connq=NULL;
  int verbose = 0;
  int sx0, sx1, sx2, sx3;
  int write_ascii=0;
  int fermion_type = 1;  // Wilson fermion type
  int num_threads=1;
  int pos;
  char filename[200], contype[200], gauge_field_filename[200];
  double ratime, retime;
  //double plaq_m, plaq_r;
  double *work=NULL;
  fermion_propagator_type fp1=NULL, fp2=NULL, fp3=NULL, fp4=NULL, fpaux=NULL, uprop=NULL, dprop=NULL, *stochastic_fp=NULL;
  spinor_propagator_type sp1, sp2;
  double q[3], phase, *gauge_trafo=NULL;
  double *stochastic_source=NULL, *stochastic_prop=NULL;
  complex w, w1;
  size_t items, bytes;
  FILE *ofs;
  int timeslice;
  DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum;
  uint32_t nersc_gauge_field_checksum;

/***********************************************************/
  int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL, qlatt_nclass=0;
  int use_lattice_momenta = 0;
  double **qlatt_list=NULL;
/***********************************************************/

/***********************************************************/
  int rel_momentum_filename_set = 0, rel_momentum_no=0;
  int **rel_momentum_list=NULL;
  char rel_momentum_filename[200];
/***********************************************************/

/***********************************************************/
  int snk_momentum_no = 1;
  int **snk_momentum_list = NULL;
  int snk_momentum_filename_set = 0;
  char snk_momentum_filename[200];
/***********************************************************/

/*******************************************************************
 * Gamma components for the Delta:
 */
  //const int num_component = 16;
  //int gamma_component[2][16] = { {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3}, \
  //                               {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}};
  //double gamma_component_sign[16] = {1., 1.,-1., 1., 1., 1.,-1., 1.,-1.,-1., 1.,-1., 1., 1.,-1., 1.};
  const int num_component = 4;
  int gamma_component[2][4] = { {0, 1, 2, 3},
                                {0, 1, 2, 3} };
  double gamma_component_sign[4] = {+1.,+1.,+1.,+1.};
/*
 *******************************************************************/
  fftw_complex *in=NULL;
#ifdef MPI
   fftwnd_mpi_plan plan_p;
#else
   fftwnd_plan plan_p;
#endif 

#ifdef MPI
  MPI_Status status;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "ah?vgf:t:F:p:P:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'a':
      write_ascii = 1;
      fprintf(stdout, "# [] will write in ascii format\n");
      break;
    case 'F':
      if(strcmp(optarg, "Wilson") == 0) {
        fermion_type = _WILSON_FERMION;
      } else if(strcmp(optarg, "tm") == 0) {
        fermion_type = _TM_FERMION;
      } else {
        fprintf(stderr, "[] Error, unrecognized fermion type\n");
        exit(145);
      }
      fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type);
      break;
    case 't':
      num_threads = atoi(optarg);
      fprintf(stdout, "# [] number of threads set to %d\n", num_threads);
      break;
    case 's':
      use_lattice_momenta = 1;
      fprintf(stdout, "# [] will use lattice momenta\n");
      break;
    case 'p':
      rel_momentum_filename_set = 1;
      strcpy(rel_momentum_filename, optarg);
      fprintf(stdout, "# [] will use current momentum file %s\n", rel_momentum_filename);
      break;
    case 'P':
      snk_momentum_filename_set = 1;
      strcpy(snk_momentum_filename, optarg);
      fprintf(stdout, "# [] will use nucleon momentum file %s\n", snk_momentum_filename);
      break;
    case 'g':
      do_gt = 1;
      fprintf(stdout, "# [] will perform gauge transform\n");
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

#ifdef OPENMP
  omp_set_num_threads(num_threads);
#endif

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

#ifdef OPENMP
  status = fftw_threads_init();
  if(status != 0) {
    fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status);
    exit(120);
  }
#endif

  /******************************************************
   *
   ******************************************************/
  VOL3 = LX*LY*LZ;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  if(N_Jacobi>0) {

    // alloc the gauge field
    alloc_gauge_field(&g_gauge_field, VOL3);
    switch(g_gauge_file_format) {
      case 0:
        sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf);
        break;
      case 1:
        sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf);
        break;
    }
  } else {
    g_gauge_field = NULL;
  }


  /*********************************************************************
   * gauge transformation
   *********************************************************************/
  if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); }

  // determine the source location
  sx0 = g_source_location/(LX*LY*LZ)-Tstart;
  sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ);
  sx2 = (g_source_location%(LY*LZ)) / LZ;
  sx3 = (g_source_location%LZ);
//  g_source_time_slice = sx0;
  fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3);
  source_timeslice = sx0;


  if(!use_lattice_momenta) {
    status = make_qcont_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map);
  } else {
    status = make_qlatt_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map);
  }
  if(status != 0) {
    fprintf(stderr, "\n[] Error while creating h4-lists\n");
    exit(4);
  }
  fprintf(stdout, "# [] number of classes = %d\n", qlatt_nclass);


  /***************************************************************************
   * read the relative momenta q to be used
   ***************************************************************************/
/*
  ofs = fopen(rel_momentum_filename, "r");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for reading\n", rel_momentum_filename);
    exit(6);
  }
  rel_momentum_no = 0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      rel_momentum_no++;
    }
  }
  if(rel_momentum_no == 0) {
    fprintf(stderr, "[] Error, number of momenta is zero\n");
    exit(7);
  } else {
    fprintf(stdout, "# [] number of current momenta = %d\n", rel_momentum_no);
  }
  rewind(ofs);
  rel_momentum_list = (int**)malloc(rel_momentum_no * sizeof(int*));
  rel_momentum_list[0] = (int*)malloc(3*rel_momentum_no * sizeof(int));
  for(i=1;i<rel_momentum_no;i++) { rel_momentum_list[i] = rel_momentum_list[i-1] + 3; }
  count=0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      sscanf(line, "%d%d%d", rel_momentum_list[count], rel_momentum_list[count]+1, rel_momentum_list[count]+2);
      count++;
    }
  }
  fclose(ofs);
  fprintf(stdout, "# [] current momentum list:\n");
  for(i=0;i<rel_momentum_no;i++) {
    fprintf(stdout, "\t%3d%3d%3d%3d\n", i, rel_momentum_list[i][0], rel_momentum_list[i][1], rel_momentum_list[i][2]);
  }
*/

  /***************************************************************************
   * read the nucleon final momenta to be used
   ***************************************************************************/
  ofs = fopen(snk_momentum_filename, "r");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for reading\n", snk_momentum_filename);
    exit(6);
  }
  snk_momentum_no = 0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      snk_momentum_no++;
    }
  }
  if(snk_momentum_no == 0) {
    fprintf(stderr, "[] Error, number of momenta is zero\n");
    exit(7);
  } else {
    fprintf(stdout, "# [] number of nucleon final momenta = %d\n", snk_momentum_no);
  }
  rewind(ofs);
  snk_momentum_list = (int**)malloc(snk_momentum_no * sizeof(int*));
  snk_momentum_list[0] = (int*)malloc(3*snk_momentum_no * sizeof(int));
  for(i=1;i<snk_momentum_no;i++) { snk_momentum_list[i] = snk_momentum_list[i-1] + 3; }
  count=0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      sscanf(line, "%d%d%d", snk_momentum_list[count], snk_momentum_list[count]+1, snk_momentum_list[count]+2);
      count++;
    }
  }
  fclose(ofs);
  fprintf(stdout, "# [] the nucleon final momentum list:\n");
  for(i=0;i<snk_momentum_no;i++) {
    fprintf(stdout, "\t%3d%3d%3d%3d\n", i, snk_momentum_list[i][0], snk_momentum_list[i][1], snk_momentum_list[i][1], snk_momentum_list[i][2]);
  }



  /***********************************************************
   * allocate memory for the spinor fields
   ***********************************************************/
  g_spinor_field = NULL;
  if(fermion_type == _TM_FERMION) {
    no_fields = 2*n_s*n_c+3;
  } else {
    no_fields =   n_s*n_c+3;
  }
  if(N_Jacobi>0) no_fields++;

  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields-2; i++) alloc_spinor_field(&g_spinor_field[i], VOL3);
  // work
  if(N_Jacobi>0) work = g_spinor_field[no_fields-4];
  // stochastic_fv
  stochastic_fv = g_spinor_field[no_fields-3];
  // stochastic source and propagator
  alloc_spinor_field(&g_spinor_field[no_fields-2], VOLUME);
  stochastic_source = g_spinor_field[no_fields-2];
  alloc_spinor_field(&g_spinor_field[no_fields-1], VOLUME);
  stochastic_prop   = g_spinor_field[no_fields-1];


  spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) );
  if(spinor_field_checksum == NULL ) {
    fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n");
    exit(73);
  }
  
  /*************************************************
   * allocate memory for the contractions
   *************************************************/
  items = 4* num_component*T;
  bytes = sizeof(double);
  connt = (double*)malloc(items*bytes);
  if(connt == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connt\n");
    exit(2);
  }
  for(ix=0; ix<items; ix++) connt[ix] = 0.;

  items = num_component * (size_t)VOL3;
  connq = create_sp_field( items );
  if(connq == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connq\n");
    exit(2);
  }

  items = (size_t)VOL3;
  stochastic_fp = create_sp_field( items );
  if(stochastic_fp== NULL) {
    fprintf(stderr, "\n[] Error, could not alloc stochastic_fp\n");
    exit(22);
  }

  /******************************************************
   * initialize FFTW
   ******************************************************/
  items = g_fv_dim * (size_t)VOL3;
  bytes = sizeof(fftw_complex);
  in  = (fftw_complex*)malloc( items * bytes );
  if(in == NULL) {
    fprintf(stderr, "[] Error, could not malloc in for FFTW\n");
    exit(155);
  }
  dims[0]=LX; dims[1]=LY; dims[2]=LZ;
  //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, g_fv_dim, (fftw_complex*)( stochastic_fv ), g_fv_dim);

  // create the fermion propagator points
  create_fp(&uprop);
  create_fp(&dprop);
  create_fp(&fp1);
  create_fp(&fp2);
  create_fp(&fp3);
  create_fp(&stochastic_fp);
  create_sp(&sp1);
  create_sp(&sp2);


  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  // !! implement twisting for _TM_FERMION
  // !!
  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ifdef OPENMP
#pragma omp parallel for private(ix) shared(stochastic_prop)
#endif
  for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(stochastic_prop+_GSI(ix)); }

  for(sid=g_sourceid; sid<=g_sourceid2;sid+=g_sourceid_step) {
    switch(g_soruce_type) {
      case 2:  // timeslice source
        sprintf(filename, "%s.%.4d.%.2d.%.5d.inverted", filename_prefix, Nconf, source_timeslice, sid);
        break;
      default:
        fprintf(stderr, "# [] source type %d not implented; exit\n", g_source_type);
        exit(100);
    }
    fprintf(stdout, "# [] trying to read sample up-prop. from file %s\n", filename);
    read_lime_spinor(stochastic_source, filename, 0);
#ifdef OPENMP
#pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source)
#endif
    for(ix=0;ix<VOLUME;ix++) { _fv_pl_eq_fv(stochastic_prop+_GSI(ix), stochastic_source+_GSI(ix)); }
  }
#ifdef OPENMP
#pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source)
#endif
  fnorm = 1. / ( (double)(g_sourceid2 - g_sourceid + 1) * g_prop_normsqr );
  for(ix=0;ix<VOLUME;ix++) { _fv_ti_eq_re(stochastic_prop+_GSI(ix), fnorm); }
  //  calculate the source
  if(fermion_type && g_propagator_bc_type == 1) {
    Q_Wilson_phi(stochastic_source, stochastic_prop);
  } else {
    Q_phi_tbc(stochastic_source, stochastic_prop);
  }

  /******************************************************
   * prepare the stochastic fermion field
   ******************************************************/
  // read timeslice of the gauge field
  if( N_Jacobi>0) {
    switch(g_gauge_file_format) {
      case 0:
        status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &ildg_gauge_field_checksum);
        break;
      case 1:
        status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &nersc_gauge_field_checksum);
        break;
    }
    if(status != 0) {
      fprintf(stderr, "[] Error, could not read gauge field\n");
      exit(21);
    }
    for(i=0; i<N_ape; i++) {
#ifdef OPENMP
      status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape);
#else
      status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape);
#endif
    }
  }
  // read timeslice of the 12 up-type propagators and smear them
  //
  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  // !! implement twisting for _TM_FERMION
  // !!
  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  for(is=0;is<n_s*n_c;is++) {
    if(fermion_type != _TM_FERMION) {
      sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is);
    } else {
      sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is);
    }
    status = read_lime_spinor_timeslice(g_spinor_field[is], source_timeslice, filename, 0, spinor_field_checksum+is);
    if(status != 0) {
      fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
      exit(102);
    }
    if(N_Jacobi > 0) {
      fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
          is, N_Jacobi, kappa_Jacobi);
      for(c=0; c<N_Jacobi; c++) {
#ifdef OPENMP
        Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#else
        Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#endif
      }
    }
  }
  for(is=0;is<g_fv_dim;is++) {
    for(ix=0;ix<VOL3;ix++) {
      iix = source_timeslice * VOL3 + ix;
      _fv_eq_gamma_ti_fv(spinor1, 5, g_spinor_field[is]+_GSI(iix));
      _co_eq_fv_dagger_ti_fv(&w, stochastic_source+_GSI(ix), spinor1);
      stochastic_fv[_GSI(ix)+2*is  ] = w.re;
      stochastic_fv[_GSI(ix)+2*is+1] = w.im;
    }
  }
  // Fourier transform
  items = g_fv_dim * (size_t)VOL3;
  bytes = sizeof(double);
  memcpy(in, stochastic_fv, items*bytes );
#ifdef OPENMP
  fftwnd_threads(num_threads, plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1);
#else
  fftwnd(plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1);
#endif


  /******************************************************
   * loop on sink momenta (most likely only one: Q=(0,0,0))
   ******************************************************/
  for(imom_snk=0;imom_snk<snk_momentum_no; imom_snk++) {

    // create Phi_tilde
    _fv_eq_zero( spinor2 );
    for(ix=0;ix<LX;ix++) {
    for(iy=0;iy<LY;iy++) {
    for(iz=0;iz<LZ;iz++) {
      iix = timeslice * VOL3 + ix;
      phase = -2.*M_PI*( (ix-sx1) * snk_momentum_list[imom_snk][0] / (double)LX 
                       + (iy-sx2) * snk_momentum_list[imom_snk][1] / (double)LY 
                       + (iz-sx3) * snk_momentum_list[imom_snk][2] / (double)LZ);
      w.re = cos(phase);
      w.im = sin(phase);
      _fv_eq_fv_ti_co(spinor1, stochastic_prop + _GSI(iix), &w);
      _fv_pl_eq_fv(spinor2, spinor);
    }}}
    // create Theta
    for(ir=0;ir<g_fv_dim;ir++) {
    for(is=0;is<g_fv_dim;is++) {
      _co_eq_co_ti_co( &(stochastic_fp[ix][ir][2*is]), &(spinor2[2*ir]), &(stochastic_fv[_GSI(ix)+2*is]) );
    }}

    /******************************************************
     * loop on timeslices
     ******************************************************/
    for(timeslice=0; timeslice<T; timeslice++) {
      append = (int)( timeslice != 0 );

      // read timeslice of the gauge field
      if( N_Jacobi>0) {
        switch(g_gauge_file_format) {
          case 0:
            status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum);
            break;
          case 1:
            status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum);
            break;
        }
        if(status != 0) {
          fprintf(stderr, "[] Error, could not read gauge field\n");
          exit(21);
        }

        for(i=0; i<N_ape; i++) {
#ifdef OPENMP
          status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape);
#else
          status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape);
#endif
        }

      }

      // read timeslice of the 12 up-type propagators and smear them
      for(is=0;is<n_s*n_c;is++) {
          sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is);
          status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is);
          if(status != 0) {
            fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
            exit(102);
          }
          if(N_Jacobi > 0) {
            fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
                is, N_Jacobi, kappa_Jacobi);
            for(c=0; c<N_Jacobi; c++) {
#ifdef OPENMP
              Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#else
              Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#endif
            }
          }
      }

      if(fermion_type == _TM_FERMION) {
        // read timeslice of the 12 down-type propagators, smear them
        for(is=0;is<n_s*n_c;is++) {
          if(do_gt == 0) {
            sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is);
            status = read_lime_spinor_timeslice(g_spinor_field[n_s*n_c+is], timeslice, filename, 0, spinor_field_checksum+n_s*n_c+is);
            if(status != 0) {
              fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
              exit(102);
            }
            if(N_Jacobi > 0) {
              fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
                   is, N_Jacobi, kappa_Jacobi);
              for(c=0; c<N_Jacobi; c++) {
#ifdef OPENMP
                Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi);
#else
                Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi);
#endif
              }
            }
        }
      }

  
      /******************************************************
       * contractions
       ******************************************************/
      for(ix=0;ix<VOL3;ix++) 
      //for(ix=0;ix<1;ix++) 
      {
  
        // assign the propagators
        _assign_fp_point_from_field(uprop, g_spinor_field, ix);
        if(fermion_type==_TM_FERMION) {
          _assign_fp_point_from_field(dprop, g_spinor_field+n_s*n_c, ix);
        } else {
          _fp_eq_fp(dprop, uprop);
        }
        flavor rotation for twisted mass fermions
        if(fermion_type == _TM_FERMION) {
          _fp_eq_rot_ti_fp(fp1, uprop, +1, fermion_type, fp2);
          _fp_eq_fp_ti_rot(uprop, fp1, +1, fermion_type, fp2);
  //        _fp_eq_rot_ti_fp(fp1, dprop, -1, fermion_type, fp2);
  //        _fp_eq_fp_ti_rot(dprop, fp1, -1, fermion_type, fp2);
        }
  
        // test: print fermion propagator point
        //printf_fp(uprop, stdout);
  
  
        for(icomp=0; icomp<num_component; icomp++) {
  
          _sp_eq_zero( connq[ix*num_component+icomp]);
  
          /******************************************************
           * first contribution
           ******************************************************/
          _fp_eq_zero(fp1);
          _fp_eq_zero(fp2);
          _fp_eq_zero(fp3);
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
  
          // S_u x C Gamma_2 = S_u x g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, uprop);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3);
    
          // first part
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop);
          // reduce to spin propagator
          _sp_eq_zero( sp1 );
          _sp_eq_fp_del_contract23_fp(sp1, fp2, fp3);
          // second part
          // reduce to spin propagator
          _sp_eq_zero( sp2 );
          _sp_eq_fp_del_contract24_fp(sp2, fp2, fp3);
          // add and assign
          _sp_pl_eq_sp(sp1, sp2);
          _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]);
          _sp_eq_sp( connq[ix*num_component+icomp], sp2);
  
          /******************************************************
           * second contribution
           ******************************************************/
          _fp_eq_zero(fp1);
          _fp_eq_zero(fp2);
          _fp_eq_zero(fp3);
          // first part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u 
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 (same S_u as above)
          _fp_eq_fp_ti_gamma(fp2, 0, fp1);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop);
          // reduce to spin propagator
          _sp_eq_zero( sp1 );
          _sp_eq_fp_del_contract23_fp(sp1, uprop, fp3);
          // second part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, uprop);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2);
          // reduce to spin propagator
          _sp_eq_zero( sp2 );
          _sp_eq_fp_del_contract24_fp(sp2, uprop, fp3);
          // add and assign
          _sp_pl_eq_sp(sp1, sp2);
          _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]);
          _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2);
  
          /******************************************************
           * third contribution
           ******************************************************/
          _fp_eq_zero(fp1);
          _fp_eq_zero(fp2);
          _fp_eq_zero(fp3);
          // first part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, fp1);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop);
          // reduce to spin propagator
          _sp_eq_zero( sp1 );
          _sp_eq_fp_del_contract34_fp(sp1, uprop, fp3);
          // second part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, uprop);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2);
          // reduce to spin propagator
          _sp_eq_zero( sp2 );
          _sp_eq_fp_del_contract34_fp(sp2, uprop, fp3);
          // add and assign
          _sp_pl_eq_sp(sp1, sp2);
          _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]);
          _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2);
  
        }  // of icomp
  
      }    // of ix
  
      /***********************************************
       * finish calculation of connq
       ***********************************************/
      if(g_propagator_bc_type == 0) {
        // multiply with phase factor
        fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice);
        ir = (timeslice - sx0 + T_global) % T_global;
        w1.re = cos( 3. * M_PI*(double)ir / (double)T_global );
        w1.im = sin( 3. * M_PI*(double)ir / (double)T_global );
        for(ix=0;ix<num_component*VOL3;ix++) {
          _sp_eq_sp(sp1, connq[ix] );
          _sp_eq_sp_ti_co( connq[ix], sp1, w1);
        }
      } else if (g_propagator_bc_type == 1) {
        // multiply with step function
        if(timeslice < sx0) {
          fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice);
          for(ix=0;ix<num_component*VOL3;ix++) {
            _sp_eq_sp(sp1, connq[ix] );
            _sp_eq_sp_ti_re( connq[ix], sp1, -1.);
          }
        }
      }
    
      if(write_ascii) {
        sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
        write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
      }
  
      /******************************************************************
       * Fourier transform
       ******************************************************************/
      items =  2 * num_component * g_sv_dim * g_sv_dim * VOL3;
      bytes = sizeof(double);
  
      memcpy(in, connq[0][0], items * bytes);
      ir = num_component * g_sv_dim * g_sv_dim;
  #ifdef OPENMP
      fftwnd_threads(num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
  #else
      fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
  #endif
  
      // add phase factor from the source location
      iix = 0;
      for(x1=0;x1<LX;x1++) {
        q[0] = (double)x1 / (double)LX;
      for(x2=0;x2<LY;x2++) {
        q[1] = (double)x2 / (double)LY;
      for(x3=0;x3<LZ;x3++) {
        q[2] = (double)x3 / (double)LZ;
        phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 );
        w1.re = cos(phase);
        w1.im = sin(phase);
  
        for(icomp=0; icomp<num_component; icomp++) {
          _sp_eq_sp(sp1, connq[iix] );
          _sp_eq_sp_ti_co( connq[iix], sp1, w1) ;
          iix++; 
        }
      }}}  // of x3, x2, x1
  
      // write to file
      sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d.Qx%.2dQy%.2dQz%.2d.%.5d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3,
         qlatt_rep[snk_momentum_list[imom_snk]][1],qlatt_rep[snk_momentum_list[imom_snk]][2],qlatt_rep[snk_momentum_list[imom_snk]][3],
         g_sourceid2-g_sourceid+1);
      sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0);
      write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice);
  
      if(write_ascii) {
        strcat(filename, ".ascii");
        write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
      }
  
  
      /***********************************************
       * calculate connt
       ***********************************************/
      for(icomp=0;icomp<num_component; icomp++) {
        // fwd
        _sp_eq_sp(sp1, connq[icomp]);
        _sp_eq_gamma_ti_sp(sp2, 0, sp1);
        _sp_pl_eq_sp(sp1, sp2);
        _co_eq_tr_sp(&w, sp1);
        connt[2*(icomp*T + timeslice)  ] = w.re * 0.25;
        connt[2*(icomp*T + timeslice)+1] = w.im * 0.25;
        // bwd
        _sp_eq_sp(sp1, connq[icomp]);
        _sp_eq_gamma_ti_sp(sp2, 0, sp1);
        _sp_mi_eq_sp(sp1, sp2);
        _co_eq_tr_sp(&w, sp1);
        connt[2*(icomp*T+timeslice + num_component*T)  ] = w.re * 0.25;
        connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25;
      }
  
    }  // of loop on timeslice

    // write connt
    sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
    ofs = fopen(filename, "w");
    if(ofs == NULL) {
      fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
      exit(3);
    }
    fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);
  
    for(icomp=0; icomp<num_component; icomp++) {
      ir = sx0;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf);
      for(it=1;it<T/2;it++) {
        ir  = ( it + sx0 ) % T_global;
        ir2 = ( (T_global - it) + sx0 ) % T_global;
        fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf);
      }
      ir = ( it + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf);
    }
    fclose(ofs);
  
    sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
    ofs = fopen(filename, "w");
    if(ofs == NULL) {
      fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
      exit(3);
    }
    fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);
  
    for(icomp=0; icomp<num_component; icomp++) {
      ir = sx0;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
      for(it=1;it<T/2;it++) {
        ir  = ( it + sx0 ) % T_global;
        ir2 = ( (T_global - it) + sx0 ) % T_global;
        fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf);
      }
      ir = ( it + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
    }
    fclose(ofs);

  }  // of loop on sink momentum ( = Delta^++ momentum, Qvec)

  /***********************************************
   * free the allocated memory, finalize
   ***********************************************/
  free_geometry();
  if(connt!= NULL) free(connt);
  if(connq!= NULL) free(connq);
  if(gauge_trafo != NULL) free(gauge_trafo);

  if(g_spinor_field!=NULL) {
    for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
    free(g_spinor_field); g_spinor_field=(double**)NULL;
  }
  if(spinor_field_checksum !=NULL) free(spinor_field_checksum);
  if(g_gauge_field != NULL) free(g_gauge_field);

  if(snk_momemtum_list != NULL) {
    if(snk_momentum_list[0] != NULL) free(snk_momentum_list[0]);
    free(snk_momentum_list);
  }
  if(rel_momemtum_list != NULL) {
    if(rel_momentum_list[0] != NULL) free(rel_momentum_list[0]);
    free(rel_momentum_list);
  }

  // free the fermion propagator points
  free_fp( &uprop );
  free_fp( &dprop );
  free_fp( &fp1 );
  free_fp( &fp2 );
  free_fp( &fp3 );
  free_sp( &sp1 );
  free_sp( &sp2 );

  free(in);
  fftwnd_destroy_plan(plan_p);

  g_the_time = time(NULL);
  fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stderr);

#ifdef MPI
  MPI_Finalize();
#endif
  return(0);
}
Example #26
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     int local_nx, local_x_start, local_ny_after_transpose,
	  local_y_start_after_transpose, total_local_size;
     fftw_complex *in, *work;
     fftwnd_plan plan = 0;
     fftwnd_mpi_plan mpi_plan;
     double t, t0 = 0.0;
     int i, N;
     
     if (sz.rank < 2)
	  return;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     if (specific) {
	  return;
     } else {
	  if (io_okay && !only_parallel)
	       plan = fftwnd_create_plan(sz.rank, sz.narray,
					 dir, speed_flag | flags
					 | wisdom_flag | no_vector_flag);
	  mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray,
					    dir, speed_flag | flags
					    | wisdom_flag | no_vector_flag);
     }
     CHECK(mpi_plan != NULL, "can't create plan");

     fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start,
			    &local_ny_after_transpose,
			    &local_y_start_after_transpose,
			    &total_local_size);

     if (io_okay && !only_parallel)
	  in = (fftw_complex *) fftw_malloc(N * howmany_fields *
					    sizeof(fftw_complex));
     else
	  in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					    sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					 sizeof(fftw_complex));
     
     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			      in, howmany_fields, 1, 0, 0, 0),
		       in, N * howmany_fields, t0);

	  fftwnd_destroy_plan(plan);
	  
	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n",
				 smart_sprint_time(t0)));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	    WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftwnd_mpi_destroy_plan(mpi_plan);

     fftw_free(in);
     fftw_free(work);

     WHEN_VERBOSE(1, my_printf("\n"));
}
Example #27
0
/* Call fftw for a 1 band complex image.
 */
static int 
cfwfft1( IMAGE *dummy, IMAGE *in, IMAGE *out )
{
	fftwnd_plan plan;
	double *buf, *q, *p;
	int x, y;

	IMAGE *cmplx = im_open_local( dummy, "fwfft1:1", "t" );

	/* Make dp complex image.
	 */
	if( !cmplx || im_pincheck( in ) || im_outcheck( out ) )
		return( -1 );
	if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) {
                im_error( "im_fwfft", _( "one band uncoded only" ) );
                return( -1 );
	}
	if( im_clip2dcm( in, cmplx ) )
                return( -1 );

	/* Make the plan for the transform.
	 */
	if( !(plan = fftw2d_create_plan( in->Ysize, in->Xsize,
		FFTW_FORWARD, 
		FFTW_MEASURE | FFTW_USE_WISDOM | FFTW_IN_PLACE )) ) {
                im_error( "im_fwfft", _( "unable to create transform plan" ) );
		return( -1 );
	}

	fftwnd_one( plan, (fftw_complex *) cmplx->data, NULL );

	fftwnd_destroy_plan( plan );

	/* WIO to out.
	 */
        if( im_cp_desc( out, in ) )
                return( -1 );
	out->Bbits = IM_BBITS_DPCOMPLEX;
	out->BandFmt = IM_BANDFMT_DPCOMPLEX;
        if( im_setupout( out ) )
                return( -1 );
	if( !(buf = (double *) IM_ARRAY( dummy, 
		IM_IMAGE_SIZEOF_LINE( out ), PEL )) )
		return( -1 );

	/* Copy to out, normalise.
	 */
	for( p = (double *) cmplx->data, y = 0; y < out->Ysize; y++ ) {
		int size = out->Xsize * out->Ysize;

		q = buf;

		for( x = 0; x < out->Xsize; x++ ) {
			q[0] = p[0] / size;
			q[1] = p[1] / size;
			p += 2;
			q += 2;
		}

		if( im_writeline( y, out, (PEL *) buf ) )
			return( -1 );
	}

	return( 0 );
}
Example #28
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int xx0, xx1, xx2, xx3;
  int y0min, y0max, y1min, y1max, y2min, y2max, y3min, y3max;
  int y0, y1, y2, y3, iy;
  int z0, z1, z2, z3, iz;
  int gid, status;
  int model_type = -1;
  double *disc  = (double*)NULL;
  double *disc2 = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  char filename[100], contype[200];
  double ratime, retime;
  double rmin2, rmax2, rsqr;
  complex w, w1;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?f:t:")) != -1) {
    switch (c) {
    case 't':
      model_type = atoi(optarg);
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n");
  fprintf(stdout, "* vp_disc_ft\n");
  fprintf(stdout, "**************************************************\n\n");
#ifdef MPI
  if(g_cart_id==0) fprintf(stdout, "# Warning: MPI-version not yet available; exit\n");
  exit(200);
#endif


  /*********************************
   * initialize MPI parameters 
   *********************************/
  mpi_init(argc, argv);

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  disc2 = (double*)calloc( 32*VOLUME, sizeof(double));
  if( disc2 == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc2\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.;


  work  = (double*)calloc(32*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  /***************************************
   * set model type function
   ***************************************/
  switch (model_type) {
    case 0:
      model_type_function = pidisc_model;
      fprintf(stdout, "# function pointer set to type pidisc_model\n");
    case 1:
      model_type_function = pidisc_model1;
      fprintf(stdout, "# function pointer set to type pidisc_model1\n");
      break;
    case 2:
      model_type_function = pidisc_model2;
      fprintf(stdout, "# function pointer set to type pidisc_model2\n");
      break;
    case 3:
      model_type_function = pidisc_model3;
      fprintf(stdout, "# function pointer set to type pidisc_model3\n");
      break;
    default:
      model_type_function = NULL;
      fprintf(stdout, "# no model function selected; will add zero\n");
      break;
  }

  /****************************************
   * prepare the model for pidisc
   * - same for all gauge configurations
   ****************************************/
  rmin2 = g_rmin * g_rmin;
  rmax2 = g_rmax * g_rmax;
  if(model_type > -1) {
    for(mu=0; mu<16; mu++) {
      model_type_function(model_mrho, model_dcoeff_re, model_dcoeff_im, work, plan_m, mu);
      for(x0=-(T-1);  x0<T;  x0++) {
        y0 = (x0 + T_global) % T_global;
      for(x1=-(LX-1); x1<LX; x1++) {
        y1 = (x1 + LX) % LX;
      for(x2=-(LY-1); x2<LY; x2++) {
        y2 = (x2 + LY) % LY;
      for(x3=-(LZ-1); x3<LZ; x3++) {
        y3 = (x3 + LZ) % LZ;
        iy = g_ipt[y0][y1][y2][y3];
        rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3);
        if(rmin2-rsqr<=_Q2EPS && rsqr-rmax2<=_Q2EPS) continue; /* radius in range for data usage, so continue */
        disc2[_GWI(mu,iy,VOLUME)  ] += work[2*iy  ];
        disc2[_GWI(mu,iy,VOLUME)+1] += work[2*iy+1];
      }}}}
      memcpy((void*)in, (void*)(disc2+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(disc2+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }
  } else {
    for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.; 
  }
  
  /***********************************************
   * start loop on gauge id.s 
   ***********************************************/
  for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) {

    if(g_cart_id==0) fprintf(stdout, "# Start working on gauge id %d\n", gid);

    /* read the new contractions */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    sprintf(filename, "%s.%.4d.%.4d", filename_prefix, gid, Nsave);
    if(g_cart_id==0) fprintf(stdout, "# Reading contraction data from file %s\n", filename);
    if(read_lime_contraction(disc, filename, 4, 0) == 106) {
      if(g_cart_id==0) fprintf(stderr, "Error, could not read from file %s, continue\n", filename);
      continue;
    }
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to read contraction: %e seconds\n", retime-ratime);

    /************************************************
     * prepare \Pi_\mu\nu (x,y)
     ************************************************/
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(x0=-T+1; x0<T; x0++) {
      y0min = x0<0 ? -x0 : 0;
      y0max = x0<0 ? T   : T-x0;
    for(x1=-LX+1; x1<LX; x1++) {
      y1min = x1<0 ? -x1 : 0;
      y1max = x1<0 ? LX  : LX-x1;
    for(x2=-LY+1; x2<LY; x2++) {
      y2min = x2<0 ? -x2 : 0;
      y2max = x2<0 ? LY  : LY-x2;
    for(x3=-LZ+1; x3<LZ; x3++) {
      y3min = x3<0 ? -x3 : 0;
      y3max = x3<0 ? LZ  : LZ-x3;
      xx0 = (x0+T ) % T;
      xx1 = (x1+LX) % LX;
      xx2 = (x2+LX) % LY;
      xx3 = (x3+LX) % LZ;
      ix = g_ipt[xx0][xx1][xx2][xx3];

      rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3);
      if(rmin2-rsqr>_Q2EPS || rsqr-rmax2>_Q2EPS) continue;
      
      for(y0=y0min; y0<y0max; y0++) {
        z0 = y0 + x0;
      for(y1=y1min; y1<y1max; y1++) {
        z1 = y1 + x1;
      for(y2=y2min; y2<y2max; y2++) {
        z2 = y2 + x2;
      for(y3=y3min; y3<y3max; y3++) {
        z3 = y3 + x3;
        iy = g_ipt[y0][y1][y2][y3];
        iz = g_ipt[z0][z1][z2][z3];

        i=0;
        for(mu=0; mu<4; mu++) {
        for(nu=0; nu<4; nu++) {
          iix = _GWI(i,ix,VOLUME);
          _co_eq_co_ti_co(&w, (complex*)(disc+_GWI(mu,iz,VOLUME)), (complex*)(disc+_GWI(nu,iy,VOLUME)));
          work[iix  ] += w.re;
          work[iix+1] += w.im;
          i++;
        }}
      }}}}
    }}}}
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to calculate \\Pi_\\mu\\nu in position space: %e seconds\n", retime-ratime);

    /***********************************************
     * Fourier transform
     ***********************************************/
    for(mu=0; mu<16; mu++) {
      memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif      
      memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }


    fnorm = 1. / ((double)T_global * (double)(LX*LY*LZ));
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %16.5e\n", fnorm);
    for(x0=0; x0<T; x0++) {
      q[0] = (double)(x0+Tstart) / (double)T_global;
    for(x1=0; x1<LX; x1++) {
      q[1] = (double)x1 / (double)LX;
    for(x2=0; x2<LY; x2++) {
      q[2] = (double)x2 / (double)LY;
    for(x3=0; x3<LZ; x3++) {
      q[3] = (double)x3 / (double)LZ;
      ix = g_ipt[x0][x1][x2][x3];
      i=0;
      for(mu=0; mu<4; mu++) {
      for(nu=0; nu<4; nu++) {
        iix = _GWI(i,ix,VOLUME);
        w.re = cos(M_PI * (q[mu] - q[nu]));
        w.im = sin(M_PI * (q[mu] - q[nu]));
        work[iix  ] = work[iix  ] * fnorm + disc2[iix  ];
        work[iix+1] = work[iix+1] * fnorm + disc2[iix+1];
        _co_eq_co_ti_co(&w1, (complex*)(work+iix), &w);
        work[iix  ] = w1.re;
        work[iix+1] = w1.im;
        i++;
      }}
    }}}}

    /***********************************************
     * save results
     ***********************************************/
    sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid, Nsave);
    if(g_cart_id==0) fprintf(stdout, "# Saving results to file %s\n", filename);
    sprintf(contype, "cvc-disc-P");
    write_lime_contraction(work, filename, 64, 16, contype, gid, Nsave);

/*
    sprintf(filename, "%sascii.%.4d.%.4d", filename_prefix2, gid, Nsave);
    write_contraction(work, NULL, filename, 16, 2, 0);
*/

    if(g_cart_id==0) fprintf(stdout, "# Finished working on gauge id %d\n", gid);
  }  /* of loop on gid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free_geometry();
  fftw_free(in);
  free(disc);
  free(disc2);
  free(work);

#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}
Example #29
0
int main(int argc, char **argv) {
  
  const int n_c=3;
  const int n_s=4;
  const char outfile_prefix[] = "delta_pp_2pt_v4";

  int c, i, icomp;
  int filename_set = 0;
  int append, status;
  int l_LX_at, l_LXstart_at;
  int ix, it, iix, x1,x2,x3;
  int ir, ir2, is;
  int VOL3;
  int do_gt=0;
  int dims[3];
  double *connt=NULL;
  spinor_propagator_type *connq=NULL;
  int verbose = 0;
  int sx0, sx1, sx2, sx3;
  int write_ascii=0;
  int fermion_type = 1;  // Wilson fermion type
  int pos;
  char filename[200], contype[200], gauge_field_filename[200];
  double ratime, retime;
  //double plaq_m, plaq_r;
  double *work=NULL;
  fermion_propagator_type *fp1=NULL, *fp2=NULL, *fp3=NULL, *uprop=NULL, *dprop=NULL, *fpaux=NULL;
  spinor_propagator_type *sp1=NULL, *sp2=NULL;
  double q[3], phase, *gauge_trafo=NULL;
  complex w, w1;
  size_t items, bytes;
  FILE *ofs;
  int timeslice;
  DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum;
  uint32_t nersc_gauge_field_checksum;
  int threadid, nthreads;

/*******************************************************************
 * Gamma components for the Delta:
 *                                                                 */
  const int num_component = 4;
  int gamma_component[2][4] = { {0, 1, 2, 3},
                                {0, 1, 2, 3} };
  double gamma_component_sign[4] = {+1.,+1.,-1.,+1.};
/*
 *******************************************************************/
  fftw_complex *in=NULL;
#ifdef MPI
   fftwnd_mpi_plan plan_p;
#else
   fftwnd_plan plan_p;
#endif 

#ifdef MPI
  MPI_Status status;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "ah?vgf:F:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'a':
      write_ascii = 1;
      fprintf(stdout, "# [] will write in ascii format\n");
      break;
    case 'F':
      if(strcmp(optarg, "Wilson") == 0) {
        fermion_type = _WILSON_FERMION;
      } else if(strcmp(optarg, "tm") == 0) {
        fermion_type = _TM_FERMION;
      } else {
        fprintf(stderr, "[] Error, unrecognized fermion type\n");
        exit(145);
      }
      fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type);
      break;
    case 'g':
      do_gt = 1;
      fprintf(stdout, "# [] will perform gauge transform\n");
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

#ifdef OPENMP
  omp_set_num_threads(g_num_threads);
#else
  fprintf(stdout, "[delta_pp_2pt_v4] Warning, resetting global thread number to 1\n");
  g_num_threads = 1;
#endif

  /* initialize MPI parameters */
  mpi_init(argc, argv);

#ifdef OPENMP
  status = fftw_threads_init();
  if(status != 0) {
    fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status);
    exit(120);
  }
#endif

  /******************************************************
   *
   ******************************************************/
  VOL3 = LX*LY*LZ;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  if(N_Jacobi>0) {

    // alloc the gauge field
    alloc_gauge_field(&g_gauge_field, VOL3);
    switch(g_gauge_file_format) {
      case 0:
        sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf);
        break;
      case 1:
        sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf);
        break;
    }
  } else {
    g_gauge_field = NULL;
  }


  /*********************************************************************
   * gauge transformation
   *********************************************************************/
  if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); }

  // determine the source location
  sx0 = g_source_location/(LX*LY*LZ)-Tstart;
  sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ);
  sx2 = (g_source_location%(LY*LZ)) / LZ;
  sx3 = (g_source_location%LZ);
//  g_source_time_slice = sx0;
  fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3);

  // allocate memory for the spinor fields
  g_spinor_field = NULL;
  no_fields = n_s*n_c;
//  if(fermion_type == _TM_FERMION) {
//    no_fields *= 2;
//  }
  if(N_Jacobi>0) no_fields++;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields-1; i++) alloc_spinor_field(&g_spinor_field[i], VOL3);
  alloc_spinor_field(&g_spinor_field[no_fields-1], VOL3);
  work = g_spinor_field[no_fields-1];

  spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) );
  if(spinor_field_checksum == NULL ) {
    fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n");
    exit(73);
  }

  // allocate memory for the contractions
  items = 4* num_component*T;
  bytes = sizeof(double);
  connt = (double*)malloc(items*bytes);
  if(connt == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connt\n");
    exit(2);
  }
  for(ix=0; ix<items; ix++) connt[ix] = 0.;

  items = num_component * (size_t)VOL3;
  connq = create_sp_field( items );
  if(connq == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connq\n");
    exit(2);
  }


  /******************************************************
   * initialize FFTW
   ******************************************************/
  items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3;
  bytes = sizeof(double);
  in  = (fftw_complex*)malloc(num_component*g_sv_dim*g_sv_dim*VOL3*sizeof(fftw_complex));
  if(in == NULL) {
    fprintf(stderr, "[] Error, could not malloc in for FFTW\n");
    exit(155);
  }
  dims[0]=LX; dims[1]=LY; dims[2]=LZ;
  //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, num_component*g_sv_dim*g_sv_dim, (fftw_complex*)( connq[0][0] ), num_component*g_sv_dim*g_sv_dim);

  uprop = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fp1   = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fp2   = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fp3   = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fpaux = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  if(uprop==NULL || fp1==NULL || fp2==NULL || fp3==NULL || fpaux==NULL ) {
    fprintf(stderr, "[] Error, could not alloc fermion propagator points\n");
    exit(57);
  }
  sp1 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); 
  sp2 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); 
  if(sp1==NULL || sp2==NULL) {
    fprintf(stderr, "[] Error, could not alloc spinor propagator points\n");
    exit(59);
  }
  for(i=0;i<g_num_threads;i++) { create_fp(uprop+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fp1+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fp2+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fp3+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fpaux+i); }
  for(i=0;i<g_num_threads;i++) { create_sp(sp1+i); }
  for(i=0;i<g_num_threads;i++) { create_sp(sp2+i); }

  /******************************************************
   * loop on timeslices
   ******************************************************/
  for(timeslice=0; timeslice<T; timeslice++) {
    append = (int)( timeslice != 0 );

    // read timeslice of the gauge field
    if( N_Jacobi>0) {
      switch(g_gauge_file_format) {
        case 0:
          status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum);
          break;
        case 1:
          status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum);
          break;
      }
      if(status != 0) {
        fprintf(stderr, "[] Error, could not read gauge field\n");
        exit(21);
      }

#ifdef OPENMP
      status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, N_ape, alpha_ape);
#else
      for(i=0; i<N_ape; i++) { status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); }
#endif

    }

    // read timeslice of the 12 up-type propagators and smear them
    for(is=0;is<n_s*n_c;is++) {
      if(do_gt == 0) {
        sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is);
        status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is);
        if(status != 0) {
          fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
          exit(102);
        }
        if(N_Jacobi > 0) {
          fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
              is, N_Jacobi, kappa_Jacobi);
#ifdef OPENMP
          Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, N_Jacobi, kappa_Jacobi);
#else
          for(c=0; c<N_Jacobi; c++) {
            Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
          }
#endif
        }
      } else {  // of if do_gt == 0
        // apply gt
        apply_gt_prop(gauge_trafo, g_spinor_field[is], is/n_c, is%n_c, 4, filename_prefix, g_source_location);
      } // of if do_gt == 0
    }

    /******************************************************
     * contractions
     ******************************************************/
#ifdef OPENMP
  omp_set_num_threads(g_num_threads);
#pragma omp parallel private (ix,icomp,threadid) \
    firstprivate (fermion_type,gamma_component,num_component,connq,\
        gamma_component_sign,VOL3,g_spinor_field,fp1,fp2,fp3,fpaux,uprop,sp1,sp2)
{
    threadid = omp_get_thread_num();
#else
    threadid = 0;
#endif
    for(ix=threadid; ix<VOL3; ix+=g_num_threads)
    {
      // assign the propagators
      _assign_fp_point_from_field(uprop[threadid], g_spinor_field, ix);
      if(fermion_type == _TM_FERMION) {
        _fp_eq_rot_ti_fp(fp1[threadid], uprop[threadid], +1, fermion_type, fp2[threadid]);
        _fp_eq_fp_ti_rot(uprop[threadid], fp1[threadid], +1, fermion_type, fp2[threadid]);
      }

      for(icomp=0; icomp<num_component; icomp++) {

        _sp_eq_zero( connq[ix*num_component+icomp]);

        /******************************************************
         * prepare propagators
         ******************************************************/
        // fp1[threadid] = C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
        _fp_eq_zero(fp1[threadid]);
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_gamma_ti_fp(fp1[threadid], gamma_component[0][icomp], uprop[threadid]);
        _fp_eq_gamma_ti_fp(fpaux[threadid], 2, fp1[threadid]);
        _fp_eq_gamma_ti_fp(fp1[threadid], 0, fpaux[threadid]);
        // fp2[threadid] = C Gamma_1 x S_u x C Gamma_2
        _fp_eq_zero(fp2[threadid]);
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_ti_gamma(fp2[threadid], 0, fp1[threadid]);
        _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp2[threadid]);
        _fp_eq_fp_ti_gamma(fp2[threadid], gamma_component[1][icomp], fpaux[threadid]);
        // fp3[threadid] = S_u x C Gamma_2 = S_u g0 g2 Gamma_2
        _fp_eq_zero(fp3[threadid]);
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_ti_gamma(fp3[threadid], 0, uprop[threadid]);
        _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp3[threadid]);
        _fp_eq_fp_ti_gamma(fp3[threadid], gamma_component[1][icomp], fpaux[threadid]);


        /******************************************************
         * contractions
         ******************************************************/
        // (1)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], uprop[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp1[threadid] );
        _sp_eq_fp_del_contract23_fp(sp1[threadid], fp3[threadid], fpaux[threadid]);
        // (2)
        // reduce to spin propagator
        _sp_eq_zero( sp2[threadid] );
        _sp_eq_fp_del_contract24_fp(sp2[threadid], fp3[threadid], fpaux[threadid]);
        // add and assign
        _sp_pl_eq_sp(sp1[threadid], sp2[threadid]);
        _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]);
        _sp_eq_sp( connq[ix*num_component+icomp], sp2[threadid]);

        // (3)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp1[threadid] );
        _sp_eq_fp_del_contract23_fp(sp1[threadid], uprop[threadid], fpaux[threadid]);
        // (4)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp2[threadid] );
        _sp_eq_fp_del_contract24_fp(sp2[threadid], uprop[threadid], fpaux[threadid]);
        // add and assign
        _sp_pl_eq_sp(sp1[threadid], sp2[threadid]);
        _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]);
        _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]);

        // (5)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp1[threadid] );
        _sp_eq_fp_del_contract34_fp(sp1[threadid], uprop[threadid], fpaux[threadid]);
        // (6)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp2[threadid] );
        _sp_eq_fp_del_contract34_fp(sp2[threadid], uprop[threadid], fpaux[threadid]);
        // add and assign
        _sp_pl_eq_sp(sp1[threadid], sp2[threadid]);
        _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]);
        _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]);
      }  // of icomp

    }    // of ix
#ifdef OPENMP
}
#endif

    /***********************************************
     * finish calculation of connq
     ***********************************************/
    if(g_propagator_bc_type == 0) {
      // multiply with phase factor
      fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice);
      ir = (timeslice - sx0 + T_global) % T_global;
      w1.re = cos( 3. * M_PI*(double)ir / (double)T_global );
      w1.im = sin( 3. * M_PI*(double)ir / (double)T_global );
      for(ix=0;ix<num_component*VOL3;ix++) {
        _sp_eq_sp(sp1[0], connq[ix] );
        _sp_eq_sp_ti_co( connq[ix], sp1[0], w1);
      }
    } else if (g_propagator_bc_type == 1) {
      // multiply with step function
      if(timeslice < sx0) {
        fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice);
        for(ix=0;ix<num_component*VOL3;ix++) {
          _sp_eq_sp(sp1[0], connq[ix] );
          _sp_eq_sp_ti_re( connq[ix], sp1[0], -1.);
        }
      }
    }
  
    if(write_ascii) {
      sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
      write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
    }

    /******************************************************************
     * Fourier transform
     ******************************************************************/
    items =  2 * num_component * g_sv_dim * g_sv_dim * VOL3;
    bytes = sizeof(double);

    memcpy(in, connq[0][0], items * bytes);
    ir = num_component * g_sv_dim * g_sv_dim;
#ifdef OPENMP
    fftwnd_threads(g_num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
#else
    fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
#endif

    // add phase factor from the source location
    iix = 0;
    for(x1=0;x1<LX;x1++) {
      q[0] = (double)x1 / (double)LX;
    for(x2=0;x2<LY;x2++) {
      q[1] = (double)x2 / (double)LY;
    for(x3=0;x3<LZ;x3++) {
      q[2] = (double)x3 / (double)LZ;
      phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 );
      w1.re = cos(phase);
      w1.im = sin(phase);

      for(icomp=0; icomp<num_component; icomp++) {
        _sp_eq_sp(sp1[0], connq[iix] );
        _sp_eq_sp_ti_co( connq[iix], sp1[0], w1) ;
        iix++; 
      }
    }}}  // of x3, x2, x1

    // write to file
    sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
    sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0);
    write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice);

    if(write_ascii) {
      strcat(filename, ".ascii");
      write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
    }


    /***********************************************
     * calculate connt
     ***********************************************/
    for(icomp=0;icomp<num_component; icomp++) {
      // fwd
      _sp_eq_sp(sp1[0], connq[icomp]);
      _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]);
      _sp_pl_eq_sp(sp1[0], sp2[0]);
      _co_eq_tr_sp(&w, sp1[0]);
      connt[2*(icomp*T + timeslice)  ] = w.re * 0.25;
      connt[2*(icomp*T + timeslice)+1] = w.im * 0.25;
      // bwd
      _sp_eq_sp(sp1[0], connq[icomp]);
      _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]);
      _sp_mi_eq_sp(sp1[0], sp2[0]);
      _co_eq_tr_sp(&w, sp1[0]);
      connt[2*(icomp*T+timeslice + num_component*T)  ] = w.re * 0.25;
      connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25;
    }

  }  // of loop on timeslice



  // write connt
  sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
  ofs = fopen(filename, "w");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
    exit(3);
  }
  fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);

  for(icomp=0; icomp<num_component; icomp++) {
    ir = sx0;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf);
    for(it=1;it<T/2;it++) {
      ir  = ( it + sx0 ) % T_global;
      ir2 = ( (T_global - it) + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf);
    }
    ir = ( it + sx0 ) % T_global;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf);
  }
  fclose(ofs);

  sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
  ofs = fopen(filename, "w");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
    exit(3);
  }
  fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);

  for(icomp=0; icomp<num_component; icomp++) {
    ir = sx0;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
    for(it=1;it<T/2;it++) {
      ir  = ( it + sx0 ) % T_global;
      ir2 = ( (T_global - it) + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf);
    }
    ir = ( it + sx0 ) % T_global;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
  }
  fclose(ofs);

  /***********************************************
   * free the allocated memory, finalize
   ***********************************************/
  free_geometry();
  if(connt!= NULL) free(connt);
  if(connq!= NULL) free(connq);
  if(gauge_trafo != NULL) free(gauge_trafo);

  if(g_spinor_field!=NULL) {
    for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
    free(g_spinor_field); g_spinor_field=(double**)NULL;
  }
  if(spinor_field_checksum !=NULL) free(spinor_field_checksum);
  if(g_gauge_field != NULL) free(g_gauge_field);

  for(i=0;i<g_num_threads;i++) { free_fp(uprop+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fp1+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fp2+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fp3+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fpaux+i); }
  for(i=0;i<g_num_threads;i++) { free_sp(sp1+i); }
  for(i=0;i<g_num_threads;i++) { free_sp(sp2+i); }
  if(uprop!=NULL) free(uprop);
  if(fp1!=NULL) free(fp1);
  if(fp2!=NULL) free(fp2);
  if(fp3!=NULL) free(fp3);
  if(fpaux!=NULL) free(fpaux);
  if(sp1!=NULL) free(sp1);
  if(sp2!=NULL) free(sp2);

  free(in);
  fftwnd_destroy_plan(plan_p);

  g_the_time = time(NULL);
  fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stderr);

#ifdef MPI
  MPI_Finalize();
#endif
  return(0);
}
Example #30
0
//-------------------------------------------------------------------
int fdct_wrapping(int N1, int N2, int nbscales, int nbangles_coarse, int allcurvelets, CpxNumMat& x, vector< vector<CpxNumMat> >& c)
{
  //---------------------------------------------
  assert(N1==x.m() && N2==x.n());
  
  int F1 = N1/2;  int F2 = N2/2;
  // ifft original data
  CpxNumMat T(x);
  fftwnd_plan p = fftw2d_create_plan(N2, N1, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
  fftwnd_one(p, (fftw_complex*)T.data(), NULL);
  fftwnd_destroy_plan(p);
  double sqrtprod = sqrt(double(N1*N2));
  for(int j=0; j<N2; j++)	 for(int i=0; i<N1; i++)		T(i,j) /= sqrtprod;
  CpxOffMat O(N1, N2);
  fdct_wrapping_fftshift(T, O);
  
  //-----------------------------------------------------------------------------
  vector<CpxOffMat> Xhghs;
  Xhghs.resize(nbscales);
  CpxOffMat X;
  //unfold or not
  if(allcurvelets==1) {
	 //--------------------------
	 double XL1 = 4.0*N1/3.0;  double XL2 = 4.0*N2/3.0; //range
	 int XS1, XS2;  int XF1, XF2;  double XR1, XR2;  fdct_wrapping_rangecompute(XL1, XL2, XS1, XS2, XF1, XF2, XR1, XR2);
	 IntOffVec t1(XS1);
	 for(int i=-XF1; i<-XF1+XS1; i++)		if(     i<-N1/2) t1(i) = i+int(N1);		else if(i>(N1-1)/2) t1(i) = i-int(N1);		else t1(i) = i;
	 IntOffVec t2(XS2);
	 for(int i=-XF2; i<-XF2+XS2; i++)		if(     i<-N2/2) t2(i) = i+int(N2);		else if(i>(N2-1)/2) t2(i) = i-int(N2);		else t2(i) = i;
	 X.resize(XS1, XS2);
	 for(int j=-XF2; j<-XF2+XS2; j++)
		for(int i=-XF1; i<-XF1+XS1; i++)
		  X(i,j) = O(t1(i), t2(j));
	 DblOffMat lowpass(XS1,XS2);
	 fdct_wrapping_lowpasscompute(XL1, XL2, lowpass); //compute the low pass filter
	 for(int j=-XF2; j<-XF2+XS2; j++)
		for(int i=-XF1; i<-XF1+XS1; i++)
		  X(i,j) *= lowpass(i,j);
  } else {
	 //--------------------------
	 X = O;
  }
  //separate
  double XL1 = 4.0*N1/3.0;  double XL2 = 4.0*N2/3.0; //range
  for(int sc=nbscales-1; sc>0; sc--) {
	 double XL1n = XL1/2;	 double XL2n = XL2/2;
	 int XS1n, XS2n;  int XF1n, XF2n;  double XR1n, XR2n;
	 fdct_wrapping_rangecompute(XL1n, XL2n, XS1n, XS2n, XF1n, XF2n, XR1n, XR2n);
	 //computer filter
	 DblOffMat lowpass(XS1n, XS2n);
	 fdct_wrapping_lowpasscompute(XL1n, XL2n, lowpass);
	 DblOffMat hghpass(XS1n, XS2n);
	 for(int j=-XF2n; j<-XF2n+XS2n; j++)
		for(int i=-XF1n; i<-XF1n+XS1n; i++)
		  hghpass(i,j) = sqrt(1-lowpass(i,j)*lowpass(i,j));
	 //separate
	 CpxOffMat Xhgh(X);
	 for(int j=-XF2n; j<-XF2n+XS2n; j++)
		for(int i=-XF1n; i<-XF1n+XS1n; i++)
		  Xhgh(i,j) *= hghpass(i,j);
	 CpxOffMat Xlow(XS1n, XS2n);
	 for(int j=-XF2n; j<-XF2n+XS2n; j++)
		for(int i=-XF1n; i<-XF1n+XS1n; i++)
		  Xlow(i,j) = X(i,j) * lowpass(i,j);
	 //store and prepare for next level
	 Xhghs[sc] = Xhgh;
	 X = Xlow;
	 XL1 = XL1/2;	 XL2 = XL2/2;
  }
  Xhghs[0] = X;
  
  //-----------------------------------------------------------------------------
  vector<int> nbangles(nbscales);
  if(allcurvelets==1) {
	 //nbangles
	 nbangles[0] = 1;
	 for(int sc=1; sc<nbscales; sc++)		nbangles[sc] = nbangles_coarse * pow2( int(ceil(double(sc-1)/2)) );
	 //c
	 c.resize(nbscales);
	 for(int sc=0; sc<nbscales; sc++)		c[sc].resize( nbangles[sc] );
	 
	 double XL1 = 4.0*N1/3.0;  double XL2 = 4.0*N2/3.0; //range
	 for(int sc=nbscales-1; sc>0; sc--) {
		fdct_wrapping_sepangle(XL1, XL2, nbangles[sc], Xhghs[sc], c[sc]);
		XL1 /= 2;		XL2 /= 2;
	 }
	 fdct_wrapping_wavelet(Xhghs[0], c[0]);
  } else {
	 //nbangles
	 nbangles[0] = 1;
	 for(int sc=1; sc<nbscales-1; sc++)		nbangles[sc] = nbangles_coarse * pow2( int(ceil(double(sc-1)/2)) );
	 nbangles[nbscales-1] = 1;
	 //c
	 c.resize(nbscales);
	 for(int sc=0; sc<nbscales; sc++)		c[sc].resize( nbangles[sc] );
	 
	 fdct_wrapping_wavelet(Xhghs[nbscales-1], c[nbscales-1]);
	 double XL1 = 2.0*N1/3.0;  double XL2 = 2.0*N2/3.0; //range
	 for(int sc=nbscales-2; sc>0; sc--) {
		fdct_wrapping_sepangle(XL1, XL2, nbangles[sc], Xhghs[sc], c[sc]);
		XL1 /= 2;		XL2 /= 2;
	 }
	 fdct_wrapping_wavelet(Xhghs[0], c[0]);
  }
  
  return 0;
}