Esempio n. 1
0
void testnd_in_place(int rank, int *n, fftw_direction dir,
		     fftwnd_plan validated_plan,
		     int alternate_api, int specific, int force_buffered)
{
     int istride;
     int N, dim, i;
     fftw_complex *in1, *in2, *out2;
     fftwnd_plan p;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     if (force_buffered)
	  flags |= FFTWND_FORCE_BUFFERED;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (!specific) {
	  if (alternate_api && (rank == 2 || rank == 3)) {
	       if (rank == 2)
		    p = fftw2d_create_plan(n[0], n[1], dir, flags);
	       else
		    p = fftw3d_create_plan(n[0], n[1], n[2], dir, flags);
	  } else		/* standard api */
	       p = fftwnd_create_plan(rank, n, dir, flags);
     } else {			/* specific plan creation */
	  if (alternate_api && (rank == 2 || rank == 3)) {
	       if (rank == 2)
		    p = fftw2d_create_plan_specific(n[0], n[1], dir, flags,
						    in1, 1,
					       (fftw_complex *) NULL, 1);
	       else
		    p = fftw3d_create_plan_specific(n[0], n[1], n[2], dir, flags,
						    in1, 1,
					       (fftw_complex *) NULL, 1);
	  } else		/* standard api */
	       p = fftwnd_create_plan_specific(rank, n, dir, flags,
					       in1, 1,
					       (fftw_complex *) NULL, 1);

     }

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* 
	   * generate random inputs */
	  for (i = 0; i < N; ++i) {
	       int j;
	       c_re(in2[i]) = DRAND();
	       c_im(in2[i]) = DRAND();
	       for (j = 0; j < istride; ++j) {
		    c_re(in1[i * istride + j]) = c_re(in2[i]);
		    c_im(in1[i * istride + j]) = c_im(in2[i]);
	       }
	  }

	  if (istride != 1 || istride != 1 || coinflip())
	       fftwnd_threads(nthreads, p, istride, in1, istride, 1,
			      (fftw_complex *) NULL, 1, 1);
	  else
	       fftwnd_threads_one(nthreads, p, in1, NULL);

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (i = 0; i < istride; ++i)
	       CHECK(compute_error_complex(in1 + i, istride, out2, 1, N) < TOLERANCE,
		     "testnd_in_place: wrong answer");
     }

     fftwnd_destroy_plan(p);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
Esempio n. 2
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_complex *in;
     fftwnd_plan plan;
     double t, t0;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     in = (fftw_complex *) fftw_malloc(N * howmany_fields *
				       sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					     speed_flag | flags
					     | wisdom_flag | no_vector_flag,
					     in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan(sz.rank, sz.narray,
				    dir, speed_flag | flags
				    | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (fftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			  in, howmany_fields, 1, 0, 0, 0),
		   in, N * howmany_fields, t0);

     FFTW_TIME_FFT(fftwnd_threads(nthreads, plan, howmany_fields,
				  in, howmany_fields, 1, 0, 0, 0),
		   in, N * howmany_fields, t);

     fftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, N)));
     WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
Esempio n. 3
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int sid, status;
  double *disc = (double*)NULL;
  double *data = (double*)NULL;
  double *bias = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  char filename[100], contype[200];
  double ratime, retime;
  double plaq; 
  double spinor1[24], spinor2[24], U_[18];
  complex w, w1, *cp1, *cp2, *cp3, *cp4;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa <= 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.\n");
    usage();
  }

  if(hpe_order%2==0 && hpe_order>0) {
    if(g_proc_id==0) fprintf(stdout, "HPE order should be odd\n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n"\
                  "* vp_disc_hpe_stoch_subtract with HPE of order %d\n"\
                  "**************************************************\n\n", hpe_order);

  /*********************************
   * initialize MPI parameters 
   *********************************/
  mpi_init(argc, argv);

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(101);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(102);
  }

  geometry();

  /************************************************
   * read the gauge field, measure the plaquette 
   ************************************************/
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
  xchange_gauge();

  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq);

  /****************************************
   * allocate memory for the spinor fields
   ****************************************/
  no_fields = 3;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc(16*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(103);
  }

  data = (double*)calloc(16*VOLUME, sizeof(double));
  if( data== (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for data\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(104);
  }
  for(ix=0; ix<16*VOLUME; ix++) data[ix] = 0.;

  work  = (double*)calloc(32*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(105);
  }

  bias  = (double*)calloc(32*VOLUME, sizeof(double));
  if( bias == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for bias\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(106);
  }
  for(ix=0; ix<32*VOLUME; ix++) bias[ix] = 0.;

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(107);
  }

  /***********************************************
   * start loop on source id.s 
   ***********************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {
    for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.;

    /* read the new propagator */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime);

    count++;

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to calculate source: %e seconds\n", retime-ratime);

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    /************************************************
     * HPE: apply BH to order hpe_order+2 
     ************************************************/
    if(hpe_order>0) {
      BHn(g_spinor_field[1], g_spinor_field[2], hpe_order+2);
    } else {
      memcpy((void*)g_spinor_field[1], (void*)g_spinor_field[2], 24*VOLUMEPLUSRAND*sizeof(double));
    }

    /************************************************
     * add new contractions to (existing) disc
     ************************************************/
    for(mu=0; mu<4; mu++) { 
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] = -0.5 * w.re;
	disc[iix+1] = -0.5 * w.im;
	data[iix  ] -= 0.5 * w.re;
	data[iix+1] -= 0.5 * w.im;

	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;
	data[iix  ] -= 0.5 * w.re;
	data[iix+1] -= 0.5 * w.im;

	iix += 2;
      }
    }
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime);
 
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));

      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }  /* of mu =0 ,..., 3*/

    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(disc+_GWI(mu,     0,VOLUME));
      cp2 = (complex*)(disc+_GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME));
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re;
        cp3->im += w1.im;
	cp1++; cp2++; cp3++;
      }
    }}
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time for Fourier trafo and adding to bias: %e seconds\n", 
      retime-ratime);
  }  /* of loop on sid */

  /************************************************
   * save results for count == Nsave 
   ************************************************/
  if(count==Nsave) {

    if(g_cart_id == 0) fprintf(stdout, "# save results for count = %d\n", count);

    for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.;

    if(hpe_order>0) {
      sprintf(filename, "vp_disc_hpe%.2d_loops_X.%.4d", hpe_order, Nconf);
      if(g_cart_id==0) fprintf(stdout, "# reading loop part from file %s\n", filename);
      if( (status = read_lime_contraction(disc, filename, 4, 0)) != 0 ) {
#ifdef MPI
        MPI_Abort(MPI_COMM_WORLD, 1);
        MPI_Finalize();
#endif
        exit(108);
      }
    }


    /* save the result in position space */
    fnorm = 1. / ( (double)count * g_prop_normsqr );
    if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
      for(ix=0; ix<VOLUME; ix++) {
        work[_GWI(mu,ix,VOLUME)  ] = data[_GWI(mu,ix,VOLUME)  ] * fnorm + disc[_GWI(mu,ix,VOLUME)  ];
        work[_GWI(mu,ix,VOLUME)+1] = data[_GWI(mu,ix,VOLUME)+1] * fnorm + disc[_GWI(mu,ix,VOLUME)+1];
      }
    }
    sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d", hpe_order, Nconf, count);
    sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-X", hpe_order, hpe_order+2);
    write_lime_contraction(work, filename, 64, 4, contype, Nconf, count);
/*
    sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d.ascii", hpe_order, Nconf, count);
    write_contraction(work, NULL, filename, 4, 2, 0);
*/

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(data+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));

      memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(data+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }  

    fnorm = 1. / ( g_prop_normsqr*g_prop_normsqr * (double)count * (double)(count-1) );
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm for purely stochastic part = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(data+_GWI(mu,     0,VOLUME));
      cp2 = (complex*)(data+_GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work+_GWI(4*mu+nu,0,VOLUME));
      cp4 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME)); 
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re = ( w1.re - cp4->re ) * fnorm;
        cp3->im = ( w1.im - cp4->im ) * fnorm;
        cp1++; cp2++; cp3++; cp4++;
      }
    }}
  
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));

      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }
 
    fnorm = 1. / ( g_prop_normsqr * (double)count );
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm for mixed stochastic-loop part = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(data + _GWI(mu,     0,VOLUME));
      cp2 = (complex*)(disc + _GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME));
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re * fnorm;
        cp3->im += w1.im * fnorm;
        cp1++; cp2++; cp3++;
      }

      cp1 = (complex*)(disc + _GWI(mu,     0,VOLUME));
      cp2 = (complex*)(data + _GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME));
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re * fnorm;
        cp3->im += w1.im * fnorm;
        cp1++; cp2++; cp3++;
      }
    }}

    fnorm = 1. / ( (double)T_global * (double)(LX*LY*LZ) );
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm for final estimator (1/T/V) = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(disc + _GWI(mu,     0,VOLUME));
      cp2 = (complex*)(disc + _GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME));
      for(x0=0; x0<T; x0++) {
        q[0] = (double)(x0+Tstart) / (double)T_global;
      for(x1=0; x1<LX; x1++) {
        q[1] = (double)x1 / (double)LX;
      for(x2=0; x2<LY; x2++) {
        q[2] = (double)x2 / (double)LY;
      for(x3=0; x3<LZ; x3++) {
        q[3] = (double)x3 / (double)LZ;
        ix = g_ipt[x0][x1][x2][x3];
        w.re = cos(M_PI * ( q[mu] - q[nu] ) );
        w.im = sin(M_PI * ( q[mu] - q[nu] ) );
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re;
        cp3->im += w1.im;
        _co_eq_co_ti_co(&w1, cp3, &w);
        cp3->re = w1.re * fnorm;
        cp3->im = w1.im * fnorm;
        cp1++; cp2++; cp3++;
      }}}}
    }}

    sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d", hpe_order, Nconf, count);
    sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-P", hpe_order, hpe_order+2);
    write_lime_contraction(work, filename, 64, 16, contype, Nconf, count);
/*
    sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d.ascii", hpe_order, Nconf, count);
    write_contraction(work, NULL, filename, 16, 2, 0);
*/
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime);
  }  /* of if count == Nsave */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc);
  free(bias);
  free(data);
  free(work);
#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif
  return(0);
}
Esempio n. 4
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int sid;
  double *disc  = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  int do_gt   = 0;
  char filename[100], contype[200];
  double ratime, retime;
  double plaq;
  double spinor1[24], spinor2[24], U_[18];
  complex w, w1, *cp1, *cp2, *cp3;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
  int *status;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);
#ifdef MPI
  if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) {
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(7);
  }
#endif

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
#ifdef MPI
  xchange_gauge();
#endif

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq);

  /* allocate memory for the spinor fields */
  no_fields = 3;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /* allocate memory for the contractions */
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

  work  = (double*)calloc(48*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  /* prepare Fourier transformation arrays */
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  /***********************************************
   * start loop on source id.s 
   ***********************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid++) {

    /* read the new propagator */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    count++;

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);

    /************************************************
     * HPE: apply BH3 
     ************************************************/
    BH3(g_spinor_field[1], g_spinor_field[2]);

    /* add new contractions to (existing) disc */
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

	iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime);


    /************************************************
     * save results for count = multiple of Nsave 
     ************************************************/
    if(count%Nsave == 0) {
      if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count);

      /* save the result in position space */
      fnorm = 1. / ( (double)count * g_prop_normsqr );
      if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
        for(ix=0; ix<VOLUME; ix++) {
          work[_GWI(mu,ix,VOLUME)  ] = disc[_GWI(mu,ix,VOLUME)  ] * fnorm;
          work[_GWI(mu,ix,VOLUME)+1] = disc[_GWI(mu,ix,VOLUME)+1] * fnorm;
        }
      }
      sprintf(filename, "cvc_hpe_X.%.4d.%.4d", Nconf, count);
      sprintf(contype, "cvc_disc_hpe_loops_3_to_3_stoch_X");
      write_lime_contraction(work, filename, 64, 4, contype, Nconf, count);

#ifdef MPI
      ratime = MPI_Wtime();
#else
      ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
      /* Fourier transform data, copy to work */
      for(mu=0; mu<4; mu++) {
        memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_m, in, NULL);
#endif
        memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));

        memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_p, in, NULL);
#endif
        memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
      }  /* of mu =0 ,..., 3*/

      fnorm = 1. / (double)(T_global*LX*LY*LZ);
      if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
      for(nu=0; nu<4; nu++) {
        cp1 = (complex*)(work+_GWI(mu,0,VOLUME));
        cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME));
        cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME));
     
        for(x0=0; x0<T; x0++) {
	  q[0] = (double)(x0+Tstart) / (double)T_global;
        for(x1=0; x1<LX; x1++) {
	  q[1] = (double)(x1) / (double)LX;
        for(x2=0; x2<LY; x2++) {
	  q[2] = (double)(x2) / (double)LY;
        for(x3=0; x3<LZ; x3++) {
	  q[3] = (double)(x3) / (double)LZ;
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re = cos( M_PI * (q[mu]-q[nu]) );
	  w.im = sin( M_PI * (q[mu]-q[nu]) );
	  _co_eq_co_ti_co(&w1, cp1, cp2);
	  _co_eq_co_ti_co(cp3, &w1, &w);
	  _co_ti_eq_re(cp3, fnorm);
	  cp1++; cp2++; cp3++;
	}}}}
      }}
  
      /* save the result in momentum space */
      sprintf(filename, "cvc_hpe_P.%.4d.%.4d", Nconf, count);
      sprintf(contype, "cvc_disc_hpe_loops_3_to_3_stoch_P");
      write_lime_contraction(work+_GWI(8,0,VOLUME), filename, 64, 16, contype, Nconf, count);

#ifdef MPI
      retime = MPI_Wtime();
#else
      retime = (double)clock() / CLOCKS_PER_SEC;
#endif
      if(g_cart_id==0) fprintf(stdout, "time to save cvc results: %e seconds\n", retime-ratime);

    }  /* of count % Nsave == 0 */
  }  /* of loop on sid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc);

  free(work);

#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  free(status);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}
Esempio n. 5
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int sx0, sx1, sx2, sx3;
  int sid;
  double *disc  = (double*)NULL;
  double *disc2 = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  double cvc_lnuy[8];
  double *gauge_trafo=(double*)NULL;
  double unit_trace[2], D_trace[2];
  int verbose = 0;
  int do_gt   = 0;
  char filename[100];
  double ratime, retime;
  double plaq;
  double spinor1[24], spinor2[24], U_[18];
  complex w, w1, *cp1, *cp2, *cp3;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p;
  int *status;
#else
  fftwnd_plan plan_p;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");

  /* read the input file */
  read_input(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);
#ifdef MPI
  if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) {
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(7);
  }
#endif

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
  xchange_gauge();

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq);

  /* get the source location coordinates */
  sx0 =   g_source_location / (LX*LY*LZ  );
  sx1 = ( g_source_location % (LX*LY*LZ) ) / (LY*LZ);
  sx2 = ( g_source_location % (LY*LZ)    ) / LZ;
  sx3 = ( g_source_location %  LZ        );

  /* read the data for lnuy */
  sprintf(filename, "cvc_lnuy_X.%.4d", Nconf);
  ofs = fopen(filename, "r");
  fprintf(stdout, "reading cvc lnuy from file %s\n", filename);
  for(mu=0; mu<4; mu++) {
    fscanf(ofs, "%lf%lf", cvc_lnuy+2*mu, cvc_lnuy+2*mu+1);
  }
  fclose(ofs);

  /* allocate memory for the spinor fields */
  no_fields = 2;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions 
   ****************************************/
  disc  = (double*)calloc(8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

  disc2 = (double*)calloc(8*VOLUME, sizeof(double));
  if( disc2 == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc2\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.;

  work  = (double*)calloc(48*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(3);
  }

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  if(g_resume==1) { /* read current disc from file */
    sprintf(filename, ".outcvc_current.%.4d", Nconf);
    c = read_contraction(disc, &count, filename, 8);

#ifdef MPI
    MPI_Gather(&c, 1, MPI_INT, status, 1, MPI_INT, 0, g_cart_grid);
    if(g_cart_id==0) {
      /* check the entries in status */
      for(i=0; i<g_nproc; i++) 
        if(status[i]!=0) { status[0] = 1; break; }
    }
    MPI_Bcast(status, 1, MPI_INT, 0, g_cart_grid);
    if(status[0]==1) {
      for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;
      count = 0;
    }
#else
    if(c != 0) {
      fprintf(stdout, "could not read current disc; start new\n");
      for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;
      count = 0;
    }
#endif
    if(g_cart_id==0) fprintf(stdout, "starting with count = %d\n", count);
  }  /* of g_resume ==  1 */

  if(do_gt==1) {
    /***********************************
     * initialize gauge transformation 
     ***********************************/
    init_gauge_trafo(&gauge_trafo,1.0);
    fprintf(stdout, "applying gauge trafo to gauge field\n");
    apply_gt_gauge(gauge_trafo);
     plaquette(&plaq);
     if(g_cart_id==0) fprintf(stdout, "plaquette plaq = %25.16e\n", plaq);
  } 
  unit_trace[0] = 0.;
  unit_trace[1] = 0.;
  D_trace[0] = 0.;
  D_trace[1] = 0.;
  
  /****************************************
   * start loop on source id.s
   ****************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid++) {

    /****************************************
     * read the new propagator
     ****************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

    /****************************************
     * check: write source before D-appl.
     ****************************************/
/*
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d", filename_prefix, Nconf, sid);
      read_lime_spinor(g_spinor_field[0], filename, 0);
    }
    for(ix=0; ix<12*VOLUME; ix++) {
      fprintf(stdout, "source: %6d%25.16e%25.16e\n", ix, g_spinor_field[0][2*ix], g_spinor_field[0][2*ix+1]);
    }
*/

    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      /* sprintf(filename, "%s.%.4d.%.2d", filename_prefix, Nconf, sid); */
      if(read_lime_spinor(g_spinor_field[1], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[1], filename) != 0) break;
    }
    xchange_field(g_spinor_field[1]);

    if(do_gt==1) {
      fprintf(stdout, "applying gt on propagators\n");
      for(ix=0; ix<VOLUME; ix++) {
        _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[1]+_GSI(ix));
        _fv_eq_fv(g_spinor_field[1]+_GSI(ix), spinor1);
      }
    }

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    count++;

    /****************************************
     * calculate the source: apply Q_phi_tbc
     ****************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[1]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);

    /****************************************
     * check: write source after D-appl.
     ****************************************/
/*
     for(ix=0; ix<12*VOLUME; ix++) {
       fprintf(stdout, "D_source: %6d%25.16e%25.16e\n", ix, g_spinor_field[0][2*ix], g_spinor_field[0][2*ix+1]);
     }
*/

    /****************************************
     * add new contractions to (existing) disc
     ****************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc2[iix  ] -= 0.5 * w.re;
	disc2[iix+1] -= 0.5 * w.im;

        iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "[%2d] contractions for CVC in %e seconds\n", g_cart_id, retime-ratime);

    /***************************************************
     * check: convergence of trace of unit matrix
     ***************************************************/
     _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(g_source_location), g_spinor_field[0]+_GSI(g_source_location));
     unit_trace[0] += w.re;
     unit_trace[1] += w.im;
     fprintf(stdout, "unit_trace: %4d%25.16e%25.16e\n", count, w.re, w.im);
     _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(g_source_location), g_spinor_field[0]+_GSI(g_iup[g_source_location][0]));
     fprintf(stdout, "shift_trace: %4d%25.16e%25.16e\n", count, w.re, w.im);

    /***************************************************
     * check: convergence of trace D_u(source_location, source_location)
     ***************************************************/
     Q_phi_tbc(g_spinor_field[1], g_spinor_field[0]);
     _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(g_source_location), g_spinor_field[1]+_GSI(g_source_location));
     D_trace[0] += w.re;
     D_trace[1] += w.im;
/*     fprintf(stdout, "D_trace: %4d%25.16e%25.16e\n", count, D_trace[0]/(double)count, D_trace[1]/(double)count); */
     fprintf(stdout, "D_trace: %4d%25.16e%25.16e\n", count, w.re, w.im); 


    /***************************************************
     * save results for count = multiple of Nsave 
     ***************************************************/
    if(count%Nsave == 0) {

      if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count);

      /* save the result in position space */

      /* divide by number of propagators */
      for(ix=0; ix<8*VOLUME; ix++) work[ix] = disc[ix]  / (double)count;
      sprintf(filename, "outcvc_Xm.%.4d.%.4d", Nconf, count);
      write_contraction(work, NULL, filename, 4, 2, 0);
      for(ix=0; ix<8*VOLUME; ix++) work[ix] = disc2[ix] / (double)count;
      sprintf(filename, "outcvc_Xp.%.4d.%.4d", Nconf, count);
      write_contraction(work, NULL, filename, 4, 2, 0);
      for(ix=0; ix<8*VOLUME; ix++) work[ix] = (disc[ix] + disc2[ix]) / (double)count;
      sprintf(filename, "outcvc_X.%.4d.%.4d", Nconf, count);
      write_contraction(work, NULL, filename, 4, 2, 0);

#ifdef MPI
      ratime = MPI_Wtime();
#else
      ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

      /****************************************
       * Fourier transform data, copy to work
       ****************************************/
      for(mu=0; mu<4; mu++) {
        memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_p, in, NULL);
#endif
        memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
      }  /* of mu =0 ,..., 3*/

      /* fnorm = 1. / ((double)count); */
      fprintf(stdout, "fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
      for(nu=0; nu<4; nu++) {
        cp1 = (complex*)(work+_GWI(mu,0,VOLUME));
        cp2 = (complex*)(cvc_lnuy+2*nu);
        cp3 = (complex*)(work+_GWI(4+4*mu+nu,0,VOLUME));
     
        for(x0=0; x0<T; x0++) {
	  q[0] = (double)(x0+Tstart) / (double)T_global;
        for(x1=0; x1<LX; x1++) {
	  q[1] = (double)(x1) / (double)LX;
        for(x2=0; x2<LY; x2++) {
	  q[2] = (double)(x2) / (double)LY;
        for(x3=0; x3<LZ; x3++) {
	  q[3] = (double)(x3) / (double)LZ;
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re = cos( M_PI * ( q[mu] - q[nu] - 2.*(sx0*q[0]+sx1*q[1]+sx2*q[2]+sx3*q[3])) );
	  w.im = sin( M_PI * ( q[mu] - q[nu] - 2.*(sx0*q[0]+sx1*q[1]+sx2*q[2]+sx3*q[3])) );
/*          fprintf(stdout, "mu=%3d, nu=%3d, t=%3d, x=%3d, y=%3d, z=%3d, phase= %21.12e + %21.12ei\n", \
              mu, nu, x0, x1, x2, x3, w.re, w.im); */
	  _co_eq_co_ti_co(&w1, cp1, cp2);
	  _co_eq_co_ti_co(cp3, &w1, &w);
          /* _co_ti_eq_re(cp3, fnorm); */
	  cp1++; cp3++;
	}
	}
	}
	}

      }
      }
  
      /* save the result in momentum space */
      sprintf(filename, "outcvc_P.%.4d.%.4d", Nconf, count);
      write_contraction(work+_GWI(4,0,VOLUME), NULL, filename, 16, 2, 0);

#ifdef MPI
      retime = MPI_Wtime();
#else
      retime = (double)clock() / CLOCKS_PER_SEC;
#endif
      if(g_cart_id==0) fprintf(stdout, "time to cvc save results: %e seconds\n", retime-ratime);

    }  /* of count % Nsave == 0 */

  }  /* of loop on sid */

  if(g_resume==1) {
    /* write current disc to file */
    sprintf(filename, ".outcvc_current.%.4d", Nconf);
    write_contraction(disc, &count, filename, 4, 0, 0);

  }

  /**************************************
   * free the allocated memory, finalize
   **************************************/
  free(g_gauge_field);
  if(do_gt==1) free(gauge_trafo);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc); free(disc2);
  free(work);
#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  free(status);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
#endif

  return(0);

}
Esempio n. 6
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix;
  int sid;
  double *disc = (double*)NULL;
  double *work = (double*)NULL;
  double *disc_diag = (double*)NULL;
  double phase[4];
  int verbose = 0;
  int do_gt   = 0;
  char filename[100];
  double ratime, retime;
  double plaq;
  double spinor1[24], spinor2[24], U_[18];
  complex w, w1, psi1[4], psi2[4];
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
  int *status;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");

  /* read the input file */
  read_input(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);
#ifdef MPI
  if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) {
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(7);
  }
#endif

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
#ifdef MPI
  xchange_gauge();
#endif

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq);

  /* allocate memory for the spinor fields */
  no_fields = 2;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /* allocate memory for the contractions */
  disc = (double*)calloc(8*VOLUME, sizeof(double));
  work = (double*)calloc(20*VOLUME, sizeof(double));
  if( (disc==(double*)NULL) || (work==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for disc/work\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

  if(g_subtract == 1) {
    /* allocate memory for disc_diag */
    disc_diag = (double*)calloc(20*VOLUME, sizeof(double));
    if( disc_diag == (double*)NULL ) {
      fprintf(stderr, "could not allocate memory for disc_diag\n");
#ifdef MPI
      MPI_Abort(MPI_COMM_WORLD, 1);
      MPI_Finalize();
#endif
      exit(8);
    }
    for(ix=0; ix<20*VOLUME; ix++) disc_diag[ix] = 0.;
  }

  /* prepare Fourier transformation arrays */
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  if(g_resume==1) { /* read current disc from file */
    sprintf(filename, ".outcvc_current.%.4d", Nconf);
    c = read_contraction(disc, &count, filename, 4);
    if( (g_subtract==1) && (c==0) ) {
      sprintf(filename, ".outcvc_diag_current.%.4d", Nconf);
      c = read_contraction(disc_diag, (int*)NULL, filename, 10);
    }
#ifdef MPI
    MPI_Gather(&c, 1, MPI_INT, status, 1, MPI_INT, 0, g_cart_grid);
    if(g_cart_id==0) {
      /* check the entries in status */
      for(i=0; i<g_nproc; i++) 
        if(status[i]!=0) { status[0] = 1; break; }
    }
    MPI_Bcast(status, 1, MPI_INT, 0, g_cart_grid);
    if(status[0]==1) {
      for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;
      count = 0;
    }
#else
    if(c != 0) {
      fprintf(stdout, "could not read current disc; start new\n");
      for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;
      if(g_subtract==1) for(ix=0; ix<20*VOLUME; ix++) disc_diag[ix] = 0.;
      count = 0;
    }
#endif
    if(g_cart_id==0) fprintf(stdout, "starting with count = %d\n", count);
  }  /* of g_resume ==  1 */
  
  /* start loop on source id.s */
  for(sid=g_sourceid; sid<=g_sourceid2; sid++) {

    /* read the new propagator */
/*    sprintf(filename, "%s.%.4d.%.2d", filename_prefix, Nconf, sid); */
    sprintf(filename, "source.%.4d.%.2d.inverted", Nconf, sid);
    if(format==0) {
      if(read_lime_spinor(g_spinor_field[1], filename, 0) != 0) break;
    }
    else if(format==1) {
      if(read_cmi(g_spinor_field[1], filename) != 0) break;
    }
    count++;
    
    xchange_field(g_spinor_field[1]); 

    /* calculate the source: apply Q_phi_tbc */
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[1]);
    xchange_field(g_spinor_field[0]); 

/*
    sprintf(filename, "%s.source.%.2d", filename, g_cart_id);
    ofs = fopen(filename, "w");
    printf_spinor_field(g_spinor_field[0], ofs);
    fclose(ofs);
*/

    /* add new contractions to (existing) disc */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
      for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */

        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[_GJI(ix, mu)  ] -= 0.25 * w.re;
	disc[_GJI(ix, mu)+1] -= 0.25 * w.im;
        if(g_subtract==1) {
	  work[_GWI(mu,ix,VOLUME)  ] = -0.25 * w.re;
	  work[_GWI(mu,ix,VOLUME)+1] = -0.25 * w.im;
	}

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[_GJI(ix, mu)  ] -= 0.25 * w.re;
	disc[_GJI(ix, mu)+1] -= 0.25 * w.im;
        if(g_subtract==1) {
	  work[_GWI(mu,ix,VOLUME)  ] -= 0.25 * w.re;
	  work[_GWI(mu,ix,VOLUME)+1] -= 0.25 * w.im;
	}
      }
    }
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "[%2d] contractions in %e seconds\n", g_cart_id, retime-ratime);

    if(g_subtract==1) {
      /* add current contribution to disc_diag */
      for(mu=0; mu<4; mu++) {
        for(i=0; i<4; i++) phase[i] = (double)(i==mu);
        memcpy((void*)in, (void*)&work[_GWI(mu,0,VOLUME)], 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_m, in, NULL);
#endif
        for(x0=0; x0<T; x0++) {
        for(x1=0; x1<LX; x1++) {
        for(x2=0; x2<LY; x2++) {
        for(x3=0; x3<LZ; x3++) {
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re =  cos( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  w.im = -sin( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  _co_eq_co_ti_co(&w1, &in[ix], &w);
	  work[_GWI(4+mu,ix,VOLUME)  ] = w1.re;
	  work[_GWI(4+mu,ix,VOLUME)+1] = w1.im;
	}
	}
	}
	}

        memcpy((void*)in, (void*)&work[_GWI(mu,0,VOLUME)], 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_p, in, NULL);
#endif
        for(x0=0; x0<T; x0++) {
        for(x1=0; x1<LX; x1++) {
        for(x2=0; x2<LY; x2++) {
        for(x3=0; x3<LZ; x3++) {
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re = cos( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  w.im = sin( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  _co_eq_co_ti_co(&w1, &in[ix], &w);
	  work[_GWI(mu,ix,VOLUME)  ] = w1.re;
	  work[_GWI(mu,ix,VOLUME)+1] = w1.im;
	}
	}
	}
	}
      }  /* of mu */
      for(ix=0; ix<VOLUME; ix++) {
        i=-1;
        for(mu=0; mu<4; mu++) {
        for(nu=mu; nu<4; nu++) {
	  i++;
	  _co_eq_co_ti_co(&w, (complex*)&work[_GWI(mu,ix,VOLUME)], (complex*)&work[_GWI(4+nu,ix,VOLUME)]);
	  disc_diag[_GWI(ix,i,10)  ] += w.re;
	  disc_diag[_GWI(ix,i,10)+1] += w.im;
	}
	}
      }
    } /* of g_subtract == 1 */

    /* save results for count = multiple of Nsave */
    if(count%Nsave == 0) {
#ifdef MPI
      ratime = MPI_Wtime();
#else
      ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
      if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count);

      /* save the result in position space */
      sprintf(filename, "outcvc_X.%.4d.%.4d", Nconf, count);
      write_contraction(disc, NULL, filename, 4, 1, 0);

      /* Fourier transform data, copy to work */
      for(mu=0; mu<4; mu++) {
        for(i=0; i<4; i++) phase[i] = (double)(i==mu);
        for(ix=0; ix<VOLUME; ix++) {
	  in[ix].re = disc[_GJI(ix,mu)  ];
	  in[ix].im = disc[_GJI(ix,mu)+1];
	}
#ifdef MPI
        fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_m, in, NULL);
#endif
        for(x0=0; x0<T; x0++) {
        for(x1=0; x1<LX; x1++) {
        for(x2=0; x2<LY; x2++) {
        for(x3=0; x3<LZ; x3++) {
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re =  cos( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  w.im = -sin( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  _co_eq_co_ti_co(&w1, &in[ix], &w);
	  work[_GWI(ix,4+mu,8)  ] = w1.re / (double)count;
	  work[_GWI(ix,4+mu,8)+1] = w1.im / (double)count;
	}
	}
	}
	}

        for(ix=0; ix<VOLUME; ix++) {
	  in[ix].re = disc[_GJI(ix, mu)  ];
	  in[ix].im = disc[_GJI(ix, mu)+1];
	}
#ifdef MPI
        fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_p, in, NULL);
#endif
        for(x0=0; x0<T; x0++) {
        for(x1=0; x1<LX; x1++) {
        for(x2=0; x2<LY; x2++) {
        for(x3=0; x3<LZ; x3++) {
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re = cos( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  w.im = sin( M_PI * 
	    (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + 
	     phase[2]*(double)x2/(double)LY                + phase[3]*(double)x3/(double)LZ) );
	  _co_eq_co_ti_co(&w1, &in[ix], &w);
	  work[_GWI(ix,mu,8)  ] = w1.re / (double)count;
	  work[_GWI(ix,mu,8)+1] = w1.im / (double)count;
	}
	}
	}
	}
	 
      }  /* of mu =0 ,..., 3*/

      /* save the result in momentum space */
      sprintf(filename, "outcvc_P.%.4d.%.4d", Nconf, count);
      write_contraction(work, NULL, filename, 8, 1, 0);

      /* calculate the correlations 00, 01, 02, 03, 11, 12, ..., 23, 33 */
      for(ix=VOLUME-1; ix>=0; ix--) {
        /* copy current data to auxilliary vector */
	memcpy((void*)psi1, (void*)&work[_GWI(ix,0,8)], 8*sizeof(double));
	memcpy((void*)psi2, (void*)&work[_GWI(ix,4,8)], 8*sizeof(double));
	i = -1;
        for(mu=0; mu<4; mu++) {
        for(nu=mu; nu<4; nu++) {
	  i++;
          _co_eq_co_ti_co(&w,&psi1[mu],&psi2[nu]);
	  if(g_subtract !=1 ) {
            work[_GWI(ix,i,10)  ] = w.re / (double)(T_global*LX*LY*LZ);
            work[_GWI(ix,i,10)+1] = w.im / (double)(T_global*LX*LY*LZ);
	  }
	  else {
	    work[_GWI(ix,i,10)  ] =
	      ( w.re - disc_diag[_GWI(ix,i,10)  ]/(double)(count*count) ) / 
	        (double)(T_global*LX*LY*LZ);
	    work[_GWI(ix,i,10)+1] =
	      ( w.im - disc_diag[_GWI(ix,i,10)+1]/(double)(count*count) ) / 
	        (double)(T_global*LX*LY*LZ);
	  }
        }
	}
      }
 
      /* save current results to file */
      sprintf(filename, "outcvc_final.%.4d.%.4d", Nconf, count);
      write_contraction(work, (int*)NULL, filename, 10, 1, 0);
#ifdef MPI
      retime = MPI_Wtime();
#else
      retime = (double)clock() / CLOCKS_PER_SEC;
#endif
      fprintf(stdout, "[%2d] time to save results: %e seconds\n", g_cart_id, retime-ratime);
    }  /* of count % Nsave == 0 */

  }  /* of loop on sid */

  /* write current disc to file */
  sprintf(filename, ".outcvc_current.%.4d", Nconf);
  write_contraction(disc, &count, filename, 4, 0, 0);

  if(g_subtract == 1) {
    /* write current disc_diag to file */
    sprintf(filename, ".outcvc_diag_current.%.4d", Nconf);
    write_contraction(disc_diag, (int*)NULL, filename, 10, 0, 0);
  }

  /* free the allocated memory, finalize */
  free(g_gauge_field); g_gauge_field=(double*)NULL;
  for(i=0; i<no_fields; i++) {
    free(g_spinor_field[i]);
    g_spinor_field[i] = (double*)NULL;
  }
  free(g_spinor_field); g_spinor_field=(double**)NULL;
  free_geometry();
  fftw_free(in);
  free(disc);
  free(work);
  if(g_subtract==1) free(disc_diag);
#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  free(status);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}