Example #1
0
INT WINAPI WinMain( HINSTANCE hInst, HINSTANCE, LPSTR, INT )
{
    WNDCLASSEX wc = { sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L, 
                      GetModuleHandle(NULL), NULL, NULL, NULL, NULL,
                      "D3D Tutorial", NULL };
    RegisterClassEx( &wc );

    HWND hWnd = CreateWindow( "D3D Tutorial", "D3D Tutorial 04", 
                              WS_OVERLAPPEDWINDOW,
                              100, 100, 300, 300,
                              GetDesktopWindow(), NULL, wc.hInstance, NULL );

    if (FAILED( InitD3D(hWnd) ))      goto MAIN_END;
    if (FAILED( init_geometry() ))    goto MAIN_END;

    ShowWindow( hWnd, SW_SHOWDEFAULT );
    UpdateWindow( hWnd );

    MSG msg;
    ZeroMemory( &msg, sizeof(msg) );
    while ( msg.message != WM_QUIT ) {
        if ( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) {
            TranslateMessage( &msg );
            DispatchMessage( &msg );
        }
        else {
            Render();
        }
    }

MAIN_END:
    UnregisterClass( "D3D Tutorial", wc.hInstance );

    return 0;
}
Example #2
0
int main()
{
  init_geometry();
  shape_t *s = new_taurus(30, 15, 30, 30);
  flushOBJ(stdout);
  free_shape(s);
  finalize_geometry();
  return 0;
}
Example #3
0
void to_revolution()
{
  init_geometry();
  shape_t *s = new_shape();
  for(unsigned i = 0; i < numpoints; ++i) {
    add_vertex(s, new_vertex(pt[i].x, WINDOW_HEIGHT-pt[i].y, 0));
  }
  new_revolution(s, DIVISION_NUMBER);
  flushOBJ(file_out);
  finalize_geometry();
}
Example #4
0
int main()
{
  init_geometry();
  shape_t *s = new_shape();
  add_vertex(s, new_vertex(0, 300, 0));
  add_vertex(s, new_vertex(20, 300, 0));
  add_vertex(s, new_vertex(20, 290, 0));
  add_vertex(s, new_vertex(300, 220, 0));
  add_vertex(s, new_vertex(295, 218, 0));
  add_vertex(s, new_vertex(60, 260, 0));
  add_vertex(s, new_vertex(5, 220, 0));
  add_vertex(s, new_vertex(5, 0, 0));
  add_vertex(s, new_vertex(0, 0, 0));
  shape_t *u = new_revolution(s, 60);
  flushOBJ(stdout);
  free_shape(s);
  free_shape(u);
  finalize_geometry();
  return 0;
}
Example #5
0
int main()
{
  init_geometry();
  shape_t *planet = new_sphere(50, 20, 20);
  shape_t *sp1 = new_sphere(10, 16, 16);
  shape_t *sp2 = new_sphere(10, 16, 16);
  shape_t *t1 = new_taurus(30, 4, 50, 10);
  shape_t *t2 = new_taurus(30, 4, 40, 10);
  shape_translate(sp1, 80, 70, 40);
  shape_translate(sp2, -90, 10, -60);
  shape_scale(t1, 5, 1.2, 5);
  shape_scale(t2, 3.5, 1.2, 3.5);
  shape_rotate(t1, -20, 0, 0, 1);
  shape_rotate(t2, 20, 0, 0, 1);
  flushOBJ(stdout);
  free_shape(planet);
  free_shape(t1);
  free_shape(t2);
  finalize_geometry();
  return 0;
}
Example #6
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int xx0, xx1, xx2, xx3;
  int y0min, y0max, y1min, y1max, y2min, y2max, y3min, y3max;
  int y0, y1, y2, y3, iy;
  int z0, z1, z2, z3, iz;
  int gid, status;
  int model_type = -1;
  double *disc  = (double*)NULL;
  double *disc2 = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  char filename[100], contype[200];
  double ratime, retime;
  double rmin2, rmax2, rsqr;
  complex w, w1;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?f:t:")) != -1) {
    switch (c) {
    case 't':
      model_type = atoi(optarg);
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n");
  fprintf(stdout, "* vp_disc_ft\n");
  fprintf(stdout, "**************************************************\n\n");
#ifdef MPI
  if(g_cart_id==0) fprintf(stdout, "# Warning: MPI-version not yet available; exit\n");
  exit(200);
#endif


  /*********************************
   * initialize MPI parameters 
   *********************************/
  mpi_init(argc, argv);

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  disc2 = (double*)calloc( 32*VOLUME, sizeof(double));
  if( disc2 == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc2\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.;


  work  = (double*)calloc(32*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  /***************************************
   * set model type function
   ***************************************/
  switch (model_type) {
    case 0:
      model_type_function = pidisc_model;
      fprintf(stdout, "# function pointer set to type pidisc_model\n");
    case 1:
      model_type_function = pidisc_model1;
      fprintf(stdout, "# function pointer set to type pidisc_model1\n");
      break;
    case 2:
      model_type_function = pidisc_model2;
      fprintf(stdout, "# function pointer set to type pidisc_model2\n");
      break;
    case 3:
      model_type_function = pidisc_model3;
      fprintf(stdout, "# function pointer set to type pidisc_model3\n");
      break;
    default:
      model_type_function = NULL;
      fprintf(stdout, "# no model function selected; will add zero\n");
      break;
  }

  /****************************************
   * prepare the model for pidisc
   * - same for all gauge configurations
   ****************************************/
  rmin2 = g_rmin * g_rmin;
  rmax2 = g_rmax * g_rmax;
  if(model_type > -1) {
    for(mu=0; mu<16; mu++) {
      model_type_function(model_mrho, model_dcoeff_re, model_dcoeff_im, work, plan_m, mu);
      for(x0=-(T-1);  x0<T;  x0++) {
        y0 = (x0 + T_global) % T_global;
      for(x1=-(LX-1); x1<LX; x1++) {
        y1 = (x1 + LX) % LX;
      for(x2=-(LY-1); x2<LY; x2++) {
        y2 = (x2 + LY) % LY;
      for(x3=-(LZ-1); x3<LZ; x3++) {
        y3 = (x3 + LZ) % LZ;
        iy = g_ipt[y0][y1][y2][y3];
        rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3);
        if(rmin2-rsqr<=_Q2EPS && rsqr-rmax2<=_Q2EPS) continue; /* radius in range for data usage, so continue */
        disc2[_GWI(mu,iy,VOLUME)  ] += work[2*iy  ];
        disc2[_GWI(mu,iy,VOLUME)+1] += work[2*iy+1];
      }}}}
      memcpy((void*)in, (void*)(disc2+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(disc2+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }
  } else {
    for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.; 
  }
  
  /***********************************************
   * start loop on gauge id.s 
   ***********************************************/
  for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) {

    if(g_cart_id==0) fprintf(stdout, "# Start working on gauge id %d\n", gid);

    /* read the new contractions */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    sprintf(filename, "%s.%.4d.%.4d", filename_prefix, gid, Nsave);
    if(g_cart_id==0) fprintf(stdout, "# Reading contraction data from file %s\n", filename);
    if(read_lime_contraction(disc, filename, 4, 0) == 106) {
      if(g_cart_id==0) fprintf(stderr, "Error, could not read from file %s, continue\n", filename);
      continue;
    }
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to read contraction: %e seconds\n", retime-ratime);

    /************************************************
     * prepare \Pi_\mu\nu (x,y)
     ************************************************/
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(x0=-T+1; x0<T; x0++) {
      y0min = x0<0 ? -x0 : 0;
      y0max = x0<0 ? T   : T-x0;
    for(x1=-LX+1; x1<LX; x1++) {
      y1min = x1<0 ? -x1 : 0;
      y1max = x1<0 ? LX  : LX-x1;
    for(x2=-LY+1; x2<LY; x2++) {
      y2min = x2<0 ? -x2 : 0;
      y2max = x2<0 ? LY  : LY-x2;
    for(x3=-LZ+1; x3<LZ; x3++) {
      y3min = x3<0 ? -x3 : 0;
      y3max = x3<0 ? LZ  : LZ-x3;
      xx0 = (x0+T ) % T;
      xx1 = (x1+LX) % LX;
      xx2 = (x2+LX) % LY;
      xx3 = (x3+LX) % LZ;
      ix = g_ipt[xx0][xx1][xx2][xx3];

      rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3);
      if(rmin2-rsqr>_Q2EPS || rsqr-rmax2>_Q2EPS) continue;
      
      for(y0=y0min; y0<y0max; y0++) {
        z0 = y0 + x0;
      for(y1=y1min; y1<y1max; y1++) {
        z1 = y1 + x1;
      for(y2=y2min; y2<y2max; y2++) {
        z2 = y2 + x2;
      for(y3=y3min; y3<y3max; y3++) {
        z3 = y3 + x3;
        iy = g_ipt[y0][y1][y2][y3];
        iz = g_ipt[z0][z1][z2][z3];

        i=0;
        for(mu=0; mu<4; mu++) {
        for(nu=0; nu<4; nu++) {
          iix = _GWI(i,ix,VOLUME);
          _co_eq_co_ti_co(&w, (complex*)(disc+_GWI(mu,iz,VOLUME)), (complex*)(disc+_GWI(nu,iy,VOLUME)));
          work[iix  ] += w.re;
          work[iix+1] += w.im;
          i++;
        }}
      }}}}
    }}}}
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to calculate \\Pi_\\mu\\nu in position space: %e seconds\n", retime-ratime);

    /***********************************************
     * Fourier transform
     ***********************************************/
    for(mu=0; mu<16; mu++) {
      memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif      
      memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }


    fnorm = 1. / ((double)T_global * (double)(LX*LY*LZ));
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %16.5e\n", fnorm);
    for(x0=0; x0<T; x0++) {
      q[0] = (double)(x0+Tstart) / (double)T_global;
    for(x1=0; x1<LX; x1++) {
      q[1] = (double)x1 / (double)LX;
    for(x2=0; x2<LY; x2++) {
      q[2] = (double)x2 / (double)LY;
    for(x3=0; x3<LZ; x3++) {
      q[3] = (double)x3 / (double)LZ;
      ix = g_ipt[x0][x1][x2][x3];
      i=0;
      for(mu=0; mu<4; mu++) {
      for(nu=0; nu<4; nu++) {
        iix = _GWI(i,ix,VOLUME);
        w.re = cos(M_PI * (q[mu] - q[nu]));
        w.im = sin(M_PI * (q[mu] - q[nu]));
        work[iix  ] = work[iix  ] * fnorm + disc2[iix  ];
        work[iix+1] = work[iix+1] * fnorm + disc2[iix+1];
        _co_eq_co_ti_co(&w1, (complex*)(work+iix), &w);
        work[iix  ] = w1.re;
        work[iix+1] = w1.im;
        i++;
      }}
    }}}}

    /***********************************************
     * save results
     ***********************************************/
    sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid, Nsave);
    if(g_cart_id==0) fprintf(stdout, "# Saving results to file %s\n", filename);
    sprintf(contype, "cvc-disc-P");
    write_lime_contraction(work, filename, 64, 16, contype, gid, Nsave);

/*
    sprintf(filename, "%sascii.%.4d.%.4d", filename_prefix2, gid, Nsave);
    write_contraction(work, NULL, filename, 16, 2, 0);
*/

    if(g_cart_id==0) fprintf(stdout, "# Finished working on gauge id %d\n", gid);
  }  /* of loop on gid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free_geometry();
  fftw_free(in);
  free(disc);
  free(disc2);
  free(work);

#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}
Example #7
0
int main(int argc, char **argv) {
  
  int c, mu, status;
  int filename_set = 0;
  int mode = 0;
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix, iiy, gid, iclass;
  int Thp1, nclass;
  int *picount;
  double *conn = (double*)NULL;
  double *conn2 = (double*)NULL;
  double q[4], qsqr;
  int verbose = 0;
  char filename[800];
  double ratime, retime;

  int *qid=NULL, *qcount=NULL, **qrep=NULL, **qmap=NULL;
  double **qlist=NULL, qmax=0.; 
  int VOL3;

  FILE *ofs;
  fftw_complex *corrt=NULL;

  fftw_complex *pi00=(fftw_complex*)NULL, *pijj=(fftw_complex*)NULL, *piavg=(fftw_complex*)NULL;

  fftw_plan plan_m;

  while ((c = getopt(argc, argv, "h?vf:m:q:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'm':
      mode = atoi(optarg);
      break;
    case 'q':
      qmax = atof(optarg);
      fprintf(stdout, "\n# [] qmax set to %e\n", qmax);
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize fftw, create plan with FFTW_FORWARD ---  in contrast to
   * FFTW_BACKWARD in e.g. avc_exact */
  plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  if(plan_m==NULL) {
    fprintf(stderr, "Error, could not create fftw plan\n");
    return(1);
  }

  T            = T_global;
  Thp1         = T/2 + 1;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  VOL3 = LX*LY*LZ;

  status = make_qlatt_orbits_3d_parity_avg(&qid, &qcount, &qlist, &nclass, &qrep, &qmap);
  if(status != 0) {
    fprintf(stderr, "\n[] Error while creating h4-lists\n");
    exit(4);
  }
  fprintf(stdout, "# [] number of classes = %d\n", nclass);
//  exit(255);

  /****************************************
   * allocate memory for the contractions *
   ****************************************/
  conn = (double*)calloc(32*VOLUME, sizeof(double));
  if( (conn==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for contr. fields\n");
    exit(3);
  }

/*
  conn2 = (double*)calloc(32*VOLUME, sizeof(double));
  if( (conn2==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for contr. fields\n");
    exit(4);
  }

  pi00 = (fftw_complex*)malloc(VOLUME*sizeof(fftw_complex));
  if( (pi00==(fftw_complex*)NULL) ) {
    fprintf(stderr, "could not allocate memory for pi00\n");
    exit(2);
  }

  pijj = (fftw_complex*)fftw_malloc(VOLUME*sizeof(fftw_complex));
  if( (pijj==(fftw_complex*)NULL) ) {
    fprintf(stderr, "could not allocate memory for pijj\n");
    exit(2);
  }
*/
  corrt = fftw_malloc(T*sizeof(fftw_complex));
  if(corrt == NULL) {
    fprintf(stderr, "\nError, could not alloc corrt\n");
    exit(3);
  }

  for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) {

//    for(ix=0; ix<VOLUME; ix++) {pi00[ix].re = 0.; pi00[ix].im = 0.;}
//    for(ix=0; ix<VOLUME; ix++) {pijj[ix].re = 0.; pijj[ix].im = 0.;}
    /***********************
     * read contractions   *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;

    sprintf(filename, "%s.%.4d", filename_prefix, gid);
    fprintf(stdout, "# Reading data from file %s\n", filename);
    if(format==2) {
      status = read_contraction(conn, NULL, filename, 16);
    } else {
      status = read_lime_contraction(conn, filename, 16, 0);
    }
    if(status != 0) {
      fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status);
      continue;
    }
/*
    sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid);
    fprintf(stdout, "# Reading data from file %s\n", filename);
    status = read_lime_contraction(conn2, filename, 16, 0);
    if(status == 106) {
      fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status);
      continue;
    }
*/
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time to read contractions %e seconds\n", retime-ratime);

    /***********************
     * fill the correlator *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
/*
    for(x1=0; x1<LX; x1++) {
    for(x2=0; x2<LY; x2++) {
    for(x3=0; x3<LZ; x3++) {
      for(x0=0; x0<T; x0++) {
        iix = g_ipt[0][x1][x2][x3]*T+x0;
        for(mu=1; mu<4; mu++) {
          ix = _GWI(5*mu,g_ipt[x0][x1][x2][x3],VOLUME);
          pijj[iix].re += ( conn[ix  ] - conn2[ix  ] ) * (double)Nsave / (double)(Nsave-1);
          pijj[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1);
        }
        ix = 2*g_ipt[x0][x1][x2][x3];
        pi00[iix].re += ( conn[ix  ] - conn2[ix  ] ) * (double)Nsave / (double)(Nsave-1);
        pi00[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1);
      }
    }}}
*/

    for(iclass=0;iclass<nclass;iclass++) {
      if(qlist[iclass][0] >= qmax) {
//        fprintf(stdout, "\n# [] will skip class %d, momentum squared = %f is too large\n", iclass, qlist[iclass][0]);
        continue;
//      } else {
//        fprintf(stdout, "\n# [] processing class %d, momentum squared = %f\n", iclass, qlist[iclass][0]);
      }

      for(x0=0; x0<T; x0++) {
        corrt[x0].re = 0.;
        corrt[x0].im = 0.;
      }

/* 
      for(x1=0;x1<VOL3;x1++) {
        if(qid[x1]==iclass) {
          fprintf(stdout, "# using mom %d ---> (%d, %d, %d)\n", x1, qrep[iclass][1], qrep[iclass][2], qrep[iclass][3]);
          for(x0=0; x0<T; x0++) {
            ix = x0*VOL3 + x1;
            corrt[x0].re += conn[_GWI(5,ix,VOLUME)  ] + conn[_GWI(10,ix,VOLUME)  ] + conn[_GWI(15,ix,VOLUME)  ];
            corrt[x0].im += conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1];
          }
        }
      }
*/
      for(x0=0; x0<T; x0++) {
        for(x1=0;x1<qcount[iclass];x1++) {
          x2 = qmap[iclass][x1];
          // if(x0==0) fprintf(stdout, "# using mom %d ---> (%d, %d, %d)\n", x2, qrep[iclass][1], qrep[iclass][2], qrep[iclass][3]);
            ix = x0*VOL3 + x2;
            corrt[x0].re += conn[_GWI(5,ix,VOLUME)  ] + conn[_GWI(10,ix,VOLUME)  ] + conn[_GWI(15,ix,VOLUME)  ];
            corrt[x0].im += conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1];
        }
      }
      // fprintf(stdout, "\n\n# ------------------------------\n");

      for(x0=0; x0<T; x0++) {
        corrt[x0].re /= (double)T * qcount[iclass];
        corrt[x0].im /= (double)T * qcount[iclass];
      }
/*      fftw(plan_m, 1, corrt, 1, T, (fftw_complex*)NULL, 0, 0); */
      fftw_one(plan_m, corrt, NULL);
      sprintf(filename, "rho.%.4d.x%.2dy%.2dz%.2d", gid, qrep[iclass][1], qrep[iclass][2], qrep[iclass][3]);
      if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
        fprintf(stderr, "Error: could not open file %s for writing\n", filename);
        exit(5);
      }
      fprintf(stdout, "# writing VKVK data to file %s\n", filename);
      fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f%21.12f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu, qlist[iclass][0]);
    
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, 0, corrt[0].re, 0., gid);
      for(x0=1; x0<(T/2); x0++) {
        fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, x0, 
          corrt[x0].re, corrt[T-x0].re, gid);
      }
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, (T/2), corrt[T/2].re, 0., gid);
      fflush(ofs);
      fclose(ofs);

      retime = (double)clock() / CLOCKS_PER_SEC;
      fprintf(stdout, "# time to fill correlator %e seconds\n", retime-ratime);

    }  // of loop on classes

  }  // end of loop on gauge id

  /***************************************
   * free the allocated memory, finalize *
   ***************************************/
  if(corrt != NULL) free(corrt);
  free_geometry();

  if(pi00 != NULL) free(pi00);
  if(pijj != NULL) free(pijj);

  fftw_destroy_plan(plan_m);

  finalize_q_orbits(&qid, &qcount, &qlist, &qrep);
  if(qmap != NULL) {
    free(qmap[0]);
    free(qmap);
  }

  if(g_cart_id == 0) {
    g_the_time = time(NULL);
    fprintf(stdout, "\n# [] %s# [] end of run\n", ctime(&g_the_time));
    fprintf(stderr, "\n# [] %s# [] end of run\n", ctime(&g_the_time));
  }

  return(0);

}
Example #8
0
int main(int argc, char **argv) {
  
  int c, mu, nu, status, gid;
  int filename_set = 0;
  int l_LX_at, l_LXstart_at;
  int source_location, have_source_flag = 0;
  int x0, x1, x2, x3, ix;
  int sx0, sx1, sx2, sx3;
  int tsize = 0;
  double *conn  = NULL;
  double *conn2 = (double*)NULL;
  int verbose = 0;
  char filename[800];
  double ratime, retime;
  FILE *ofs;
  int ivec[4], idx[4], imu;
  double q[4], wre, wim;

  fftw_complex *inT=NULL, *outT=NULL, *inL=NULL, *outL=NULL;

  fftw_plan plan_m_T, plan_m_L;

  while ((c = getopt(argc, argv, "h?vf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  // set the default values
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# [get_corr_v2] reading input parameters from file %s\n", filename);
  read_input_parser(filename);

  // some checks on the input data
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    fprintf(stdout, "# [get_corr_v2] T=%d, LX=%d, LY=%d, LZ=%d\n", T_global, LX, LY, LZ);
    if(g_proc_id==0) fprintf(stderr, "[get_corr_v2] Error, T and L's must be set\n");
    usage();
  }

  // initialize MPI parameters
  mpi_init(argc, argv);

  /* initialize fftw, create plan with FFTW_FORWARD ---  in contrast to
   * FFTW_BACKWARD in e.g. avc_exact */
  plan_m_T = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE);
  plan_m_L = fftw_create_plan(LX, FFTW_FORWARD, FFTW_MEASURE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "[get_corr_v2] Error from init_geometry\n");
    EXIT(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions *
   ****************************************/
  conn = (double*)calloc(32 * VOLUME, sizeof(double));
  if( (conn==NULL) ) {
    fprintf(stderr, "[get_corr_v2] Error, could not allocate memory for contr. fields\n");
    EXIT(2);
  }

  conn2= (double*)calloc(8 * T, sizeof(double));
  if( (conn2==NULL) ) {
    fprintf(stderr, "[get_corr_v2] Error, could not allocate memory for corr.\n");
    EXIT(3);
  }

  /*****************************************
   * prepare Fourier transformation arrays * 
   *****************************************/
  inT   = (fftw_complex*)malloc(T  * sizeof(fftw_complex));
  inL   = (fftw_complex*)malloc(LX * sizeof(fftw_complex));
  outT  = (fftw_complex*)malloc(T  * sizeof(fftw_complex));
  outL  = (fftw_complex*)malloc(LX * sizeof(fftw_complex));
  if( inT==NULL || inL==NULL || outT==NULL || outL==NULL ) {
    fprintf(stderr, "[get_corr_v2] Error, could not allocate fftw fields\n");
    EXIT(4);
  }

  /********************************
   * determine source coordinates *
   ********************************/
/*
  have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T));
  if(have_source_flag==1) fprintf(stdout, "# [get_corr_v2] process %2d has source location\n", g_cart_id);
  sx0 = g_source_location/(LX*LY*LZ)-Tstart;
  sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ);
  sx2 = (g_source_location%(LY*LZ)) / LZ;
  sx3 = (g_source_location%LZ);
  if(have_source_flag==1) { 
    fprintf(stdout, "# [get_corr_v2] local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3);
    source_location = g_ipt[sx0][sx1][sx2][sx3];
  }
  have_source_flag = 0;
*/

  for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) {
    memset(conn, 0, 32*VOLUME*sizeof(double));
    memset(conn2, 0, 8*T*sizeof(double));
    /***********************
     * read contractions   *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
    sprintf(filename, "%s.%.4d", filename_prefix, gid);
    if(format==2 || format==3) {
      status = read_contraction(conn, NULL, filename, 16);
    } else if( format==0) {
      status = read_lime_contraction(conn, filename, 16, 0);
    }
    if(status != 0) {
      // fprintf(stderr, "[get_corr_v2] Error from read_contractions, status was %d\n", status);
      // EXIT(5);
      fprintf(stderr, "[get_corr_v2] Warning, could not read contractions for gid %d, status was %d\n", gid, status);
      continue;
    }
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# [get_corr_v2] time to read contractions %e seconds\n", retime-ratime);
  
    // TEST Pi_mm
/*
    fprintf(stdout, "# [get_corr_v2] Pi_mm\n");
    for(x0=0; x0<T; x0++) {
    for(x1=0; x1<LX; x1++) {
    for(x2=0; x2<LY; x2++) {
    for(x3=0; x3<LZ; x3++) {
      ix = g_ipt[x0][x1][x2][x3];
      for(nu=0;nu<4;nu++) {
        wre = conn[_GWI(5*nu,ix,VOLUME)];
        wim = conn[_GWI(5*nu,ix,VOLUME)+1];
        fprintf(stdout, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim);
      }
    }}}}
*/
    // TEST Ward Identity
/*
    fprintf(stdout, "# [get_corr_v2] Ward identity\n");
    for(x0=0; x0<T; x0++) {
      q[0] = 2. * sin(M_PI * (double)x0 / (double)T);
    for(x1=0; x1<LX; x1++) {
      q[1] = 2. * sin(M_PI * (double)x1 / (double)LX);
    for(x2=0; x2<LY; x2++) {
      q[2] = 2. * sin(M_PI * (double)x2 / (double)LY);
    for(x3=0; x3<LZ; x3++) {
      q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ);
      ix = g_ipt[x0][x1][x2][x3];
      for(nu=0;nu<4;nu++) {
        wre =   q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \
              + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)];
        wim =   q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \
              + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1];
        fprintf(stdout, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim);
      }
    }}}}
*/
  
    /***********************
     * fill the correlator *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
    for(mu=0; mu<4; mu++) {
      ivec[0] = (0 + mu)%4;
      ivec[1] = (1 + mu)%4;
      ivec[2] = (2 + mu)%4;
      ivec[3] = (3 + mu)%4;
      idx[ivec[1]] = 0;
      idx[ivec[2]] = 0;
      idx[ivec[3]] = 0;
      tsize = (mu==0) ? T : LX;
      for(x0=0; x0<tsize; x0++) {
        idx[ivec[0]] = x0;
        for(nu=1; nu<4; nu++) {
          imu = (mu+nu) % 4;
          // ix = get_indexf(idx[0],idx[1],idx[2],idx[3],imu,imu);
          ix = _GWI(5*imu, g_ipt[idx[0]][idx[1]][idx[2]][idx[3]], VOLUME);
          // TEST
          //fprintf(stdout, "\tPi_%d_%d x0=%3d mu=%3d\tix=%8d\n", mu, mu, x0, imu, ix);
          conn2[2*(mu*T+x0)  ] += conn[ix  ];
          conn2[2*(mu*T+x0)+1] += conn[ix+1];
        }
      }
    }
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# [get_corr_v2] time to fill correlator %e seconds\n", retime-ratime);
   
    // TEST
/*
    fprintf(stdout, "# [get_corr_v2] correlators\n");
    for(mu=0;mu<4;mu++) {
    for(x0=0; x0<T; x0++) {
      fprintf(stdout, "\t%3d%3d%25.16e%25.16e\n", mu, x0, conn2[2*(mu*T+x0)], conn2[2*(mu*T+x0)+1]);
    }}
*/  
    /*****************************************
     * reverse Fourier transformation
     *****************************************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
    memcpy((void*)inT, (void*)conn2, 2*T*sizeof(double));
    fftw_one(plan_m_T, inT, outT);
    for(ix=0; ix<T; ix++) {
      conn2[2*ix  ] = outT[ix].re / (double)T;
      conn2[2*ix+1] = outT[ix].im / (double)T;
    }
    for(mu=1; mu<4; mu++) {
      memcpy((void*)inL, (void*)(conn2+2*mu*T), 2*LX*sizeof(double));
      fftw_one(plan_m_L, inL, outL);
      for(ix=0; ix<LX; ix++) {
        conn2[2*(mu*T+ix)  ] = outL[ix].re / (double)LX;
        conn2[2*(mu*T+ix)+1] = outL[ix].im / (double)LX;
      }
    }
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# [get_corr_v2] time for Fourier transform %e seconds\n", retime-ratime);
  
    ratime = (double)clock() / CLOCKS_PER_SEC;
    sprintf(filename, "v0v0_corr.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "[get_corr_v2] Error, could not open file %s for writing\n", filename);
      EXIT(6);
    }
    x0 = 0;
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid);
    for(x0=1; x0<T/2; x0++) {
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], gid);
    }
    x0 = T / 2;
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid);
    fclose(ofs);
  
    for(mu=1; mu<4; mu++) {
      sprintf(filename, "v%dv%d_corr.%.4d", mu, mu, gid);
      if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
        fprintf(stderr, "[get_corr_v2] Error, could not open file %s for writing\n", filename);
        EXIT(7);
      }
      x0 = 0;
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], 0., gid);
      for(x0=1; x0<LX/2; x0++) {
        fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], conn2[2*(mu*T+ LX-x0)], gid);
      }
      x0 = LX / 2;
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], 0., gid);
      fclose(ofs);
    }
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# [get_corr_v2] time to write correlator %e seconds\n", retime-ratime);
  }  // of loop on gid

  /***************************************
   * free the allocated memory, finalize *
   ***************************************/
  free_geometry();
  fftw_free(inT);
  fftw_free(outT);
  fftw_free(inL);
  fftw_free(outL);
  free(conn);
  free(conn2);
  fftw_destroy_plan(plan_m_T);
  fftw_destroy_plan(plan_m_L);

  fprintf(stdout, "# [get_corr_v2] %s# [get_corr_v2] end of run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "[get_corr_v2] %s[get_corr_v2] end of run\n", ctime(&g_the_time));
  fflush(stderr);

  return(0);

}
Example #9
0
int main(int argc, char **argv) {
  
  int c, mu;
  int filename_set = 0;
  int l_LX_at, l_LXstart_at;
  int source_location, have_source_flag = 0;
  int x0, ix;
  int sx0, sx1, sx2, sx3;
  int check_WI=0;
  double *conn  = (double*)NULL;
  double *conn2 = (double*)NULL;
  int verbose = 0;
  char filename[800];
  double ratime, retime;
  FILE *ofs;
/**************************
 * variables for WI check */
  int x1, x2, x3, nu;
  double wre, wim, q[4];
/**************************/

  fftw_complex *in=(fftw_complex*)NULL, *out=(fftw_complex*)NULL;

  fftw_plan plan_m;

  while ((c = getopt(argc, argv, "wh?vf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'w':
      check_WI = 1;
      fprintf(stdout, "# [get_rho_corr] check WI in momentum space\n");
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");

  // set the default values
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# [get_rho_corr] reading input parameters from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize fftw, create plan with FFTW_FORWARD ---  in contrast to
   * FFTW_BACKWARD in e.g. avc_exact */
  plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions *
   ****************************************/
  conn = (double*)calloc(2 * 16 * VOLUME, sizeof(double));
  if( (conn==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for contr. fields\n");
    exit(3);
  }
  for(ix=0; ix<32*VOLUME; ix++) conn[ix] = 0.;

  conn2= (double*)calloc(2 * T, sizeof(double));
  if( (conn2==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for corr.\n");
    exit(2);
  }
  for(ix=0; ix<2*T; ix++) conn2[ix] = 0.;

  /*****************************************
   * prepare Fourier transformation arrays * 
   *****************************************/
  in   = (fftw_complex*)malloc(T*sizeof(fftw_complex));
  out  = (fftw_complex*)malloc(T*sizeof(fftw_complex));
  if( (in==(fftw_complex*)NULL) || (out==(fftw_complex*)NULL) ) exit(4);

  /********************************
   * determine source coordinates *
   ********************************/
  have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T));
  if(have_source_flag==1) fprintf(stdout, "process %2d has source location\n", g_cart_id);
  sx0 = g_source_location/(LX*LY*LZ)-Tstart;
  sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ);
  sx2 = (g_source_location%(LY*LZ)) / LZ;
  sx3 = (g_source_location%LZ);
  if(have_source_flag==1) { 
    fprintf(stdout, "local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3);
    source_location = g_ipt[sx0][sx1][sx2][sx3];
  }
  have_source_flag = 0;

  /***********************
   * read contractions   *
   ***********************/
  ratime = (double)clock() / CLOCKS_PER_SEC;
  // read_contraction(conn, (int*)NULL, filename_prefix, 16);
  read_lime_contraction(conn, filename_prefix, 16, 0);

  retime = (double)clock() / CLOCKS_PER_SEC;
  fprintf(stdout, "time to read contractions %e seconds\n", retime-ratime);

  // TEST Ward Identity
  if(check_WI) {
    fprintf(stdout, "# [get_corr_v5] Ward identity\n");
    sprintf(filename, "WI.%.4d", Nconf);
    ofs = fopen(filename, "w");
    if(ofs == NULL) exit(32);
    for(x0=0; x0<T; x0++) {
      q[0] = 2. * sin(M_PI * (double)x0 / (double)T);
    for(x1=0; x1<LX; x1++) {
      q[1] = 2. * sin(M_PI * (double)x1 / (double)LX);
    for(x2=0; x2<LY; x2++) {
      q[2] = 2. * sin(M_PI * (double)x2 / (double)LY);
    for(x3=0; x3<LZ; x3++) {
      q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ);
      ix = g_ipt[x0][x1][x2][x3];
      for(nu=0;nu<4;nu++) {
        wre =   q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \
              + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)];
        wim =   q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \
              + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1];
        fprintf(ofs, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim);
      }
    }}}}
    fclose(ofs);
  }

  /***********************
   * fill the correlator *
   ***********************/
  ratime = (double)clock() / CLOCKS_PER_SEC;
  for(x0=0; x0<T; x0++) {
    for(mu=1; mu<4; mu++) {
      ix = get_indexf(x0,0,0,0,mu,mu);
      fprintf(stdout, "x0=%3d, mu=%3d\tix=%8d\n", x0, mu, ix);
      conn2[2*x0  ] += conn[ix  ];
      conn2[2*x0+1] += conn[ix+1];
    }
  }
  retime = (double)clock() / CLOCKS_PER_SEC;
  fprintf(stdout, "time to fill correlator %e seconds\n", retime-ratime);
 
  /********************************
   * test: print correl to stdout *
   ********************************/
  for(x0=0; x0<T; x0++) {
    fprintf(stdout, "%3d%25.16e%25.16e\n", x0, conn2[2*x0], conn[2*x0+1]);
  }

  /*****************************************
   * do the reverse Fourier transformation *
   *****************************************/
  ratime = (double)clock() / CLOCKS_PER_SEC;
  memcpy((void*)in, (void*)conn2, 2*T*sizeof(double));
  fftw_one(plan_m, in, out);
  for(ix=0; ix<T; ix++) {
    conn2[2*ix  ] = out[ix].re / (double)T;
    conn2[2*ix+1] = out[ix].im / (double)T;
  }
  retime = (double)clock() / CLOCKS_PER_SEC;
  fprintf(stdout, "time for Fourier transform %e seconds\n", retime-ratime);

  
  ratime = (double)clock() / CLOCKS_PER_SEC;
  sprintf(filename, "rho_corr.%.4d", Nconf);
  if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
    fprintf(stderr, "could not open file %s for writing\n", filename);
    exit(5);
  }
  //for(x0=0; x0<T; x0++) {
  //  fprintf(ofs, "%3d%25.16e%25.16e\n", x0, conn2[2*x0], conn2[2*x0+1]);
  //}

  x0 = 0;
  fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., Nconf);
  for(x0=1; x0<T/2; x0++) {
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], Nconf);
  }
  x0 = T/2;
  fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., Nconf);

  fclose(ofs);
  retime = (double)clock() / CLOCKS_PER_SEC;
  fprintf(stdout, "time to write correlator %e seconds\n", retime-ratime);
  

  /***************************************
   * free the allocated memory, finalize *
   ***************************************/
  free_geometry();
  fftw_free(in);
  fftw_free(out);
  free(conn);
  free(conn2);
  fftw_destroy_plan(plan_m);

  return(0);

}
Example #10
0
File: pi_ud_tp0.c Project: etmc/cvc
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  //int use_real_part = 1;
  int ix, iix;
  int sid, status, gid, it, ir, it2;
  double *disc = (double*)NULL;
  double *work = (double*)NULL;
  double *bias = (double*)NULL;
  //double fnorm;
  int verbose = 0;
  unsigned int VOL3;
  char filename[100];
  double ratime, retime;
  double *tmp = NULL;
  complex w;
  FILE *ofs = NULL;

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  g_the_time = time(NULL);
  fprintf(stdout, "# [pi_ud_tp0] using global time stamp %s", ctime(&g_the_time));


  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }

  fprintf(stdout, "# [pi_ud_tp0] **************************************************\n");
  fprintf(stdout, "# [pi_ud_tp0] pi_ud_p\n");
  fprintf(stdout, "# [pi_ud_tp0] **************************************************\n\n");

  /*********************************
   * initialize MPI parameters 
   *********************************/
  mpi_init(argc, argv);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    EXIT(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    EXIT(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc(16*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
    EXIT(3);
  }

  work  = (double*)calloc(2*T_global, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "[pi_ud_tp0] could not allocate memory for work\n");
    EXIT(5);
  }
  bias  = (double*)calloc(2*T_global, sizeof(double));
  if( bias == (double*)NULL ) { 
    fprintf(stderr, "[pi_ud_tp0] could not allocate memory for bias\n");
    EXIT(6);
  }
  tmp = (double*)calloc(2*T_global, sizeof(double));
  if( tmp == (double*)NULL ) { 
    fprintf(stderr, "[pi_ud_tp0] could not allocate memory for tmp\n");
    EXIT(8);
  }

  /***********************************************
   * start loop on gauge id.s 
   ***********************************************/
  for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) {
    memset(work, 0, 2*T_global*sizeof(double));
    memset(bias, 0, 2*T_global*sizeof(double));
 
    count = 0;
    /***********************************************
     * start loop on source id.s 
     ***********************************************/
    for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {
      memset(disc, 0, 16*VOLUME*sizeof(double));

      ratime = CLOCK;
      sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, sid);
      status = read_lime_contraction(disc, filename, 4, 0);

      if(status!=0) {
        fprintf(stderr, "Error, could not read contraction data from file %s\n", filename);
        EXIT(7);
      }

      retime = CLOCK;
      if(g_cart_id==0) fprintf(stdout, "# time to read contractions: %e seconds\n", retime-ratime);

      count++;

      ratime = CLOCK;

      // add current to sum
      for(it=0; it<T; it++) {
        tmp[2*it  ] = 0.;
        tmp[2*it+1] = 0.;
        for(iix=0; iix<VOL3; iix++) {
          ix = it * VOL3 + iix;
          tmp[2*it  ] += disc[_GWI(1,ix,VOLUME)  ] + disc[_GWI(2,ix,VOLUME)  ] + disc[_GWI(3,ix,VOLUME)  ];
          tmp[2*it+1] += disc[_GWI(1,ix,VOLUME)+1] + disc[_GWI(2,ix,VOLUME)+1] + disc[_GWI(3,ix,VOLUME)+1];
        }
      }

      for(it=0; it<2*T_global; it++) { work[it] += tmp[it]; }

      // add to bias
      for(it=0; it<T_global; it++) {
      for(ir=0; ir<T_global; ir++) {
        it2 = (it + ir ) % T_global;

        _co_eq_co_ti_co( &w, (complex*)&(tmp[2*it2]), (complex*)&(tmp[2*it]) );
        bias[2*it  ] += w.re;
        bias[2*it+1] += w.im;
      }}
      retime = CLOCK;
      if(g_cart_id==0) fprintf(stdout, "# [pi_ud_tp0] time to calculate contractions: %e seconds\n", retime-ratime);

      if(count==Nsave) {
        memset(disc, 0, 2*T_global*sizeof(double));
 
        for(it=0; it<T_global; it++) {
        for(ir=0; ir<T_global; ir++) {
          it2 = (it + ir ) % T_global;

          _co_eq_co_ti_co( &w, (complex*)&(work[2*it2]), (complex*)&(work[2*it]) );
          disc[2*it  ] += w.re;
          disc[2*it+1] += w.im;
        }}

        for(it=0; it<2*T_global; it++) {
          disc[it] -= bias[it];
        }
        

        sprintf(filename, "pi_ud_t.%.4d.%.4d", gid, count);
        ofs = fopen(filename, "w");
        if(ofs == NULL) {
          fprintf(stderr, "[pi_ud_tp0] Error, could not open file %s for writing\n", filename);
          EXIT(8);
        }
        fprintf(ofs, "# [pi_ud_tp0] results for disc. t-dependent correlator at zero spatial momentum\n# %s", ctime(&g_the_time));
        fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 11, 1, 0, disc[0], 0., Nconf);
        for(it=1; it<T_global/2; it++) {
          fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 11, 1, it, disc[it], disc[2*(T_global-it)], Nconf);
        }
        fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 11, 1, T_global/2, disc[T_global/2], 0., Nconf);
        fclose(ofs);
        retime = CLOCK;
        if(g_cart_id==0) fprintf(stdout, "# [pi_ud_tp0] time to save cvc results: %e seconds\n", retime-ratime);
      }  // of count % Nsave == 0
    }    // of loop on sid
  }      // of loop on gid

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free_geometry();
  if(disc != NULL) free(disc);
  if(work != NULL) free(work);
  if(bias != NULL) free(bias);
  if(tmp != NULL) free(tmp);

  if(g_cart_id == 0) {
    fprintf(stdout, "# [pi_ud_tp0] %s# [pi_ud_tp0] end of run\n", ctime(&g_the_time));
    fflush(stdout);
    fprintf(stderr, "# [pi_ud_tp0] %s# [pi_ud_tp0] end of run\n", ctime(&g_the_time));
    fflush(stderr);
  }
#ifdef MPI
  MPI_Finalize();
#endif
  return(0);
}
Example #11
0
File: jc_corr.c Project: etmc/cvc
int main(int argc, char **argv) {
 
  int Thm1; 
  int c, i, mu, nthreads;
  int count        = 0;
  int filename_set = 0;
  int l_LX_at, l_LXstart_at;
  int x0, x1, y0;
  int ix, iy, idx1, idx2;
  int VOL3;
  int sid1, sid2, status, gid;
  size_t nprop=0;
  double *data=NULL, *data2=NULL, *data3=NULL;
  double fnorm;
  double *mom2=NULL, *mom4=NULL;
  char filename[100];
  double ratime, retime;
  FILE *ofs=NULL;


  /****************************************
   * initialize the distance vectors
   ****************************************/

  while ((c = getopt(argc, argv, "h?f:")) != -1) {
    switch (c) {
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n");
  fprintf(stdout, "* jc_corr\n");
  fprintf(stdout, "**************************************************\n\n");

  T            = T_global;
  Thm1         = T / 2 - 1;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  VOL3         = LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "#       T            = %3d\n"\
		  "#       Tstart       = %3d\n"\
		  "#       l_LX_at      = %3d\n"\
		  "#       l_LXstart_at = %3d\n"\
		  "#       FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, T, Tstart, l_LX_at, l_LXstart_at, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  nprop = (size_t)(g_sourceid2 - g_sourceid) / (size_t)g_sourceid_step + 1;
  fprintf(stdout, "\n# [jc_corr] number of stoch. propagators = %lu\n", nprop);

  data = (double*)calloc(8*FFTW_LOC_VOLUME, sizeof(double));
  if( data==NULL ) { 
    fprintf(stderr, "could not allocate memory for data\n");
    exit(3);
  }

  /* nprop * T * 3(i=1,2,3) * 2(real and imaginary part) */
  data2 = (double*)calloc(nprop*T*6, sizeof(double));
  if( data2==NULL ) { 
    fprintf(stderr, "could not allocate memory for data2\n");
    exit(3);
  }

  data3 = (double*)calloc(2*T, sizeof(double));
  if( data3==NULL ) { 
    fprintf(stderr, "could not allocate memory for data3\n");
    exit(3);
  }


  fnorm = 1. / ( (double)nprop * (double)(nprop-1) * (double)(LX*LY*LZ) );
  fprintf(stdout, "\n# [jc_corr] fnorm = %25.16e\n", fnorm);

  for(ix=0; ix<nprop*T; ix++) data2[ix] = 0.;

  /***********************************************
   * start loop on gauge id.s 
   ***********************************************/
  for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) {

    /* calculate the t-dependent current at zero spatial momentum */
    for(sid1=0; sid1<nprop; sid1++) {
      sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, g_sourceid + sid1*g_sourceid_step);
      if(read_lime_contraction(data, filename, 4, 0) != 0) {
        fprintf(stderr, "\n[jc_corr] Error, could not read field no. %d\n", sid1);
        exit(15);
      }

      for(mu=0;mu<3;mu++) {
        for(x0=0;x0<T;x0++) {
          ix = g_ipt[x0][0][0][0];
          ix = _GWI(5*(mu+1), ix, VOLUME);
          for(iy=0;iy<VOL3;iy++) {
            data2[2*(sid1*3*T + mu*T + x0)  ] += data[ix + 2*iy  ];
            data2[2*(sid1*3*T + mu*T + x0)+1] += data[ix + 2*iy+1];
          }
        }
      }

    }

    /***********************************************
     * calculate the correlator
     *  - remember: x1 is the time difference of the correlator,
     *    x0 and y0 are the time coordinates of the currents
     ***********************************************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
    for(i=0;i<2*T; i++) data3[i] = 0.;
    for(sid1=0; sid1<nprop-1; sid1++) {
    for(sid2=sid1+1; sid2<nprop; sid2++) {
      for(y0=0;y0<T;y0++) {
        for(x1=0;x1<T;x1++) {
          x0 = (y0 + x1) % T;
          // first component
          idx1 = 2 * ( sid1*3*T + 0*T + y0 );
          idx2 = 2 * ( sid2*3*T + 0*T + x0 );
          // real part of the product
          data3[2*x1  ] += data2[idx1  ] * data2[idx2  ] - data2[idx1+1]*data2[idx2+1];
          // imaginary part of the product
          data3[2*x1+1] += data2[idx1+1] * data2[idx2  ] + data2[idx1  ]*data2[idx2+1];

          // second component
          idx1 = 2 * ( sid1*3*T + 1*T + y0 );
          idx2 = 2 * ( sid2*3*T + 1*T + x0 );
          // real part of the product
          data3[2*x1  ] += data2[idx1  ] * data2[idx2  ] - data2[idx1+1]*data2[idx2+1];
          // imaginary part of the product
          data3[2*x1+1] += data2[idx1+1] * data2[idx2  ] + data2[idx1  ]*data2[idx2+1];

          // third component
          idx1 = 2 * ( sid1*3*T + 2*T + y0 );
          idx2 = 2 * ( sid2*3*T + 2*T + x0 );
          // real part of the product
          data3[2*x1  ] += data2[idx1  ] * data2[idx2  ] - data2[idx1+1]*data2[idx2+1];
          // imaginary part of the product
          data3[2*x1+1] += data2[idx1+1] * data2[idx2  ] + data2[idx1  ]*data2[idx2+1];
        }
      }
    }}  // of sid2 and sid1


    // normalization
    for(x0=0;x0<2*T;x0++) { data3[x0] *= fnorm; }

    for(x0=0;x0<T/2-1;x0++) {
      mom2[x0] = 0.;
      mom4[x0] = 0.;
    }

    for(x0=1;x0<T/2;x0++) {
      if(x0==1) {
        mom2[0] = ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0);
        mom4[0] = ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0*x0*x0);
      } else {
        mom2[x0-1] = mom2[x0-2] + ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0);
        mom4[x0-1] = mom4[x0-2] + ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0*x0*x0);
      }
    }
    for(i=0;i<Thm1;i++) mom2[i] /= 6.;
    for(i=0;i<Thm1;i++) mom4[i] /= 72.;


    /************************************************
     * save results in position space
     ************************************************/
    sprintf(filename, "pi_ud_tp0.%4d.%.4d", gid, nprop);
    ofs = fopen(filename, "w");
    if (ofs==NULL) {
     fprintf(stderr, "\n[jc_corr] Error, could not open file %s for writing\n", filename);
     exit(9);
    }
    fprintf(ofs, "0 1  0%25.16e%25.16e%d\n", data3[0], 0., gid);
    for(x0=1;x0<=Thm1;x0++)
      fprintf(ofs, "0 1 %2d%25.16e%25.16e%d\n", x0, data3[x0], data3[T-x0], gid);
    fprintf(ofs, "0 1 %2d%25.16e%25.16e%d\n", x0, data3[x0], 0., gid);
    fclose(ofs);

    sprintf(filename, "pi_ud_mom.%4d.%.4d", gid, nprop);
    ofs = fopen(filename, "w");
    if (ofs==NULL) {
     fprintf(stderr, "\n[jc_corr] Error, could not open file %s for writing\n", filename);
     exit(9);
    }
    for(i=0;i<Thm1;i++)
      fprintf(ofs, "%2d%25.16e%25.16e\n", i, mom2[i], mom4[i]);
    fclose(ofs);
    
    retime = (double)clock() / CLOCKS_PER_SEC;
    if(g_cart_id == 0) fprintf(stdout, "# time for building correl.: %e seconds\n", retime-ratime);

  }  /* of loop on gid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free_geometry();
  free(data);
  free(data2);
  free(data3);
  free(mom2);
  free(mom4);
  return(0);

}
Example #12
0
File: get_corr.c Project: etmc/cvc
int main(int argc, char **argv) {
  
  int c, mu, status;
  int filename_set = 0;
  int mode = 0;
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix, iiy, gid;
  int Thp1, nclass;
  int *oh_count=(int*)NULL, *oh_id=(int*)NULL, oh_nc;
  int *picount;
  double *conn = (double*)NULL;
  double *conn2 = (double*)NULL;
  double **oh_val=(double**)NULL;
  double q[4], qsqr;
  int verbose = 0;
  char filename[800];
  double ratime, retime;
  FILE *ofs;
  fftw_complex *corrt=NULL;

  fftw_complex *pi00=(fftw_complex*)NULL, *pijj=(fftw_complex*)NULL, *piavg=(fftw_complex*)NULL;

  fftw_plan plan_m;

  while ((c = getopt(argc, argv, "h?vf:m:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'm':
      mode = atoi(optarg);
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize fftw, create plan with FFTW_FORWARD ---  in contrast to
   * FFTW_BACKWARD in e.g. avc_exact */
  plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  if(plan_m==NULL) {
    fprintf(stderr, "Error, could not create fftw plan\n");
    return(1);
  }

  T            = T_global;
  Thp1         = T/2 + 1;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions *
   ****************************************/
  conn = (double*)calloc(32*VOLUME, sizeof(double));
  if( (conn==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for contr. fields\n");
    exit(3);
  }

/*
  conn2 = (double*)calloc(32*VOLUME, sizeof(double));
  if( (conn2==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for contr. fields\n");
    exit(4);
  }

  pi00 = (fftw_complex*)malloc(VOLUME*sizeof(fftw_complex));
  if( (pi00==(fftw_complex*)NULL) ) {
    fprintf(stderr, "could not allocate memory for pi00\n");
    exit(2);
  }

  pijj = (fftw_complex*)fftw_malloc(VOLUME*sizeof(fftw_complex));
  if( (pijj==(fftw_complex*)NULL) ) {
    fprintf(stderr, "could not allocate memory for pijj\n");
    exit(2);
  }
*/
  corrt = fftw_malloc(T*sizeof(fftw_complex));

  for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) {

//    for(ix=0; ix<VOLUME; ix++) {pi00[ix].re = 0.; pi00[ix].im = 0.;}
//    for(ix=0; ix<VOLUME; ix++) {pijj[ix].re = 0.; pijj[ix].im = 0.;}
    /***********************
     * read contractions   *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;

    sprintf(filename, "%s", filename_prefix);
    fprintf(stdout, "# Reading data from file %s\n", filename);
    status = read_lime_contraction(conn, filename, 16, 0);
    if(status == 106) {
      fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status);
      continue;
    }
/*
    sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid);
    fprintf(stdout, "# Reading data from file %s\n", filename);
    status = read_lime_contraction(conn2, filename, 16, 0);
    if(status == 106) {
      fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status);
      continue;
    }
*/
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time to read contractions %e seconds\n", retime-ratime);

    /***********************
     * fill the correlator *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
/*
    for(x1=0; x1<LX; x1++) {
    for(x2=0; x2<LY; x2++) {
    for(x3=0; x3<LZ; x3++) {
      for(x0=0; x0<T; x0++) {
        iix = g_ipt[0][x1][x2][x3]*T+x0;
        for(mu=1; mu<4; mu++) {
          ix = _GWI(5*mu,g_ipt[x0][x1][x2][x3],VOLUME);
          pijj[iix].re += ( conn[ix  ] - conn2[ix  ] ) * (double)Nsave / (double)(Nsave-1);
          pijj[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1);
        }
        ix = 2*g_ipt[x0][x1][x2][x3];
        pi00[iix].re += ( conn[ix  ] - conn2[ix  ] ) * (double)Nsave / (double)(Nsave-1);
        pi00[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1);
      }
    }}}
*/
    for(x0=0; x0<T; x0++) {
      ix = g_ipt[x0][0][0][0];
      corrt[x0].re = conn[_GWI(5,ix,VOLUME)  ] + conn[_GWI(10,ix,VOLUME)  ] + conn[_GWI(15,ix,VOLUME)  ];
      corrt[x0].im = conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1];
      corrt[x0].re /= (double)T;
      corrt[x0].im /= (double)T;
    }
/*    fftw(plan_m, 1, corrt, 1, T, (fftw_complex*)NULL, 0, 0); */
    fftw_one(plan_m, corrt, NULL);
    sprintf(filename, "rho.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing VKVK data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, 0, corrt[0].re, 0., gid);
    for(x0=1; x0<(T/2); x0++) {
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, x0, 
        corrt[x0].re, corrt[T-x0].re, gid);
    }
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, (T/2), corrt[T/2].re, 0., gid);

    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time to fill correlator %e seconds\n", retime-ratime);

#ifdef _UNDEF
 
    free(conn);
/*    free(conn2); */

    /********************************
     * test: print correl to stdout *
     ********************************/
/*
  fprintf(stdout, "\n\n# *****************   pijj   *****************\n");
  for(ix=0; ix<LX*LY*LZ; ix++) {
    iix = ix*T;
    for(x0=0; x0<T; x0++) {
      fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pijj[iix+x0].re, pijj[iix+x0].im);
    }
  }
  fprintf(stdout, "\n\n# *****************   pi00   *****************\n");
  for(ix=0; ix<LX*LY*LZ; ix++) {
    iix = ix*T;
    for(x0=0; x0<T; x0++) {
      fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pi00[iix+x0].re, pi00[iix+x0].im);
    }
  }
*/

    /*****************************************
     * do the reverse Fourier transformation *
     *****************************************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
    fftw(plan_m, LX*LY*LZ,  pi00, 1, T, (fftw_complex*)NULL, 0, 0);
    fftw(plan_m, LX*LY*LZ,  pijj, 1, T, (fftw_complex*)NULL, 0, 0);

    for(ix=0; ix<VOLUME; ix++) {
      pi00[ix].re /= (double)T; pi00[ix].im /= (double)T;
      pijj[ix].re /= 3.*(double)T; pijj[ix].im /= 3.*(double)T;
    }
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for Fourier transform %e seconds\n", retime-ratime);

  /*****************************************
   * write to file
   *****************************************/
  ratime = (double)clock() / CLOCKS_PER_SEC;
  sprintf(filename, "pi00.%.4d", gid);
  if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
    fprintf(stderr, "Error: could not open file %s for writing\n", filename);
    exit(5);
  }
  fprintf(stdout, "# writing pi00-data to file %s\n", filename);
  fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
  for(x1=0; x1<LX; x1++) {
  for(x2=0; x2<LY; x2++) {
  for(x3=0; x3<LZ; x3++) {
    ix = g_ipt[0][x1][x2][x3]*T;
/*    fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */
    for(x0=0; x0<T; x0++) {
/*      fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pi00[ix+x0].re, pi00[ix+x0].im); */
      fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pi00[ix+x0].re, pi00[ix+x0].im);
    }
  }}}
  fclose(ofs);

  sprintf(filename, "pijj.%.4d", gid);
  if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
    fprintf(stderr, "Error: could not open file %s for writing\n", filename);
    exit(5);
  }
  fprintf(stdout, "# writing pijj-data to file %s\n", filename);
  fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
  for(x1=0; x1<LX; x1++) {
  for(x2=0; x2<LY; x2++) {
  for(x3=0; x3<LZ; x3++) {
    ix = g_ipt[0][x1][x2][x3]*T;
/*    fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */
    for(x0=0; x0<T; x0++) {
/*      fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pijj[ix+x0].re, pijj[ix+x0].im); */
      fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pijj[ix+x0].re, pijj[ix+x0].im);
    }
  }}}
  fclose(ofs);

  retime = (double)clock() / CLOCKS_PER_SEC;
  fprintf(stdout, "# time to write correlator %e seconds\n", retime-ratime);

/*
  if(mode==0) {
    ratime = (double)clock() / CLOCKS_PER_SEC;
    if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110);
    sprintf(filename, "corr.00.mom");
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    for(ix=0; ix<VOLUME; ix++) picount[ix] = 0;
    for(x1=0; x1<LX; x1++) {
      q[1] = 2. * sin(M_PI * (double)x1 / (double)LX);
    for(x2=0; x2<LY; x2++) {
      q[2] = 2. * sin(M_PI * (double)x2 / (double)LY);
    for(x3=0; x3<LZ; x3++) {
      q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ);
      qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; 
      if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) {
        ix = g_ipt[0][x1][x2][x3];
        picount[ix] = 1;
        fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr);
      }
    }}}
    fclose(ofs);
    sprintf(filename, "corr_00.00.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_00-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    sprintf(filename, "corr_jj.00.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_jj-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime);
    free(picount);
  } else if(mode==1) {
    ratime = (double)clock() / CLOCKS_PER_SEC;
    if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110);
    sprintf(filename, "corr.01.mom");
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110);
    for(ix=0; ix<VOLUME; ix++) picount[ix] = 0;
    for(x1=0; x1<LX; x1++) {
      q[1] = 2. * M_PI * (double)x1 / (double)LX;
    for(x2=0; x2<LY; x2++) {
      q[2] = 2. * M_PI * (double)x2 / (double)LY;
    for(x3=0; x3<LZ; x3++) {
      q[3] = 2. * M_PI * (double)x3 / (double)LZ;
      qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; 
      if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) {
        ix = g_ipt[0][x1][x2][x3];
        picount[ix] = 1;
        fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr);
      }
    }}}
    fclose(ofs);
    sprintf(filename, "corr_00.01.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_01-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    sprintf(filename, "corr_jj.01.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_jj-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for writing: %e seconds\n", retime-ratime);
    free(picount);
  } else if(mode==2) {
    if(make_H3orbits(&oh_id, &oh_count, &oh_val, &oh_nc) != 0) return(123);
    ratime = (double)clock() / CLOCKS_PER_SEC;
    nclass = oh_nc / Thp1;
    if( (piavg = (fftw_complex*)malloc(oh_nc*sizeof(fftw_complex))) == (fftw_complex*)NULL) exit(110);
    if( (picount = (int*)malloc(oh_nc*sizeof(int))) == (int*)NULL) exit(110);

    for(ix=0; ix<oh_nc; ix++) {
      piavg[ix].re = 0.; 
      piavg[ix].im = 0.;
      picount[ix]  = 0;
    }

    for(ix=0; ix<LX*LY*LZ; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*T+x0;
        iiy = oh_id[ix]*Thp1+x0;
        piavg[iiy].re += pi00[iix].re;
        piavg[iiy].im += pi00[iix].im;
        if(x0>0 && x0<T/2) {
          iix = ix*T+(T-x0);
          piavg[iiy].re += pi00[iix].re;
          piavg[iiy].im += pi00[iix].im;
        }
      }
      picount[oh_id[ix]]++;
    }
    for(ix=0; ix<nclass; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*Thp1+x0;
        if(picount[ix]>0) {
          piavg[iix].re /= (double)picount[ix];
          piavg[iix].im /= (double)picount[ix];
          if(x0>0 && x0<T/2) {
            piavg[iix].re /= 2.;
            piavg[iix].im /= 2.;
          }
        }
      }
    }
    sprintf(filename, "corr02_00.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr-00-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(x1=0; x1<nclass; x1++) {
      if(oh_val[0][x1]>=g_qhatsqr_min-_Q2EPS && oh_val[0][x1]<=g_qhatsqr_max+_Q2EPS) {
        ix = x1*Thp1;
        for(x0=0; x0<Thp1; x0++) {
          fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, 
            picount[x1]);
        }
      }
    }
    fclose(ofs);

    for(ix=0; ix<oh_nc; ix++) {
      piavg[ix].re = 0.; 
      piavg[ix].im = 0.;
      picount[ix]  = 0;
    }

    for(ix=0; ix<LX*LY*LZ; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*T+x0;
        iiy = oh_id[ix]*Thp1+x0;
        piavg[iiy].re += pijj[iix].re;
        piavg[iiy].im += pijj[iix].im;
        if(x0>0 && x0<T/2) {
          iix = ix*T+(T-x0);
          piavg[iiy].re += pijj[iix].re;
          piavg[iiy].im += pijj[iix].im;
        }
      }
      picount[oh_id[ix]]++;
    }
    for(ix=0; ix<nclass; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*Thp1+x0;
        if(picount[ix]>0) {
          piavg[iix].re /= (double)picount[ix];
          piavg[iix].im /= (double)picount[ix];
          if(x0>0 && x0<T/2) {
            piavg[iix].re /= 2.;
            piavg[iix].im /= 2.;
          }
        }
    }}
  
    sprintf(filename, "corr02_jj.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr-jj-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(x1=0; x1<nclass; x1++) {
      ix = x1*Thp1;
      for(x0=0; x0<Thp1; x0++) {
        fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, 
          picount[x1]);
      }
    }
    fclose(ofs);
    sprintf(filename, "corr.02.mom");
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    for(ix=0; ix<VOLUME; ix++) fprintf(ofs, "%5d%25.16e%5d", ix, oh_val[0][ix], picount[ix]);
    fclose(ofs);


    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime);

    free(piavg); free(picount);
  }
*/

#endif
  }

  /***************************************
   * free the allocated memory, finalize *
   ***************************************/
  free(corrt);
  free_geometry();
/*
  free(pi00);
  free(pijj);
*/
  fftw_destroy_plan(plan_m);

  return(0);

}
FX_ENTRY GrContext_t FX_CALL
grSstWinOpen(
             GrScreenResolution_t screen_resolution,
             GrScreenRefresh_t    refresh_rate,
             GrColorFormat_t      color_format,
             GrOriginLocation_t   origin_location,
             int                  nColBuffers,
             int                  nAuxBuffers)
{
   uint32_t screen_width, screen_height;
   struct retro_variable var = { "mupen64-screensize", 0 };
   // ZIGGY
   // allocate static texture names
   // the initial value should be big enough to support the maximal resolution
   free_texture = 32*1024*1024;
   default_texture = free_texture++;
   color_texture = free_texture++;
   depth_texture = free_texture++;

   LOG("grSstWinOpen(%d, %d, %d, %d, %d %d)\r\n", screen_resolution&~0x80000000, refresh_rate, color_format, origin_location, nColBuffers, nAuxBuffers);

   width = 640;
   height = 480;
   bool ret = environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var);
   if (ret && var.value)
   {
      if (sscanf(var.value ? var.value : "640x480", "%dx%d", &width, &height) != 2)
      {
         width = 640;
         height = 480;
      }
   }
   glViewport(0, 0, width, height);

   lfb_color_fmt = color_format;
   if (origin_location != GR_ORIGIN_UPPER_LEFT) DISPLAY_WARNING("origin must be in upper left corner");
   if (nColBuffers != 2) DISPLAY_WARNING("number of color buffer is not 2");
   if (nAuxBuffers != 1) DISPLAY_WARNING("number of auxiliary buffer is not 1");

   if (isExtensionSupported("GL_ARB_texture_env_combine") == 0 &&
         isExtensionSupported("GL_EXT_texture_env_combine") == 0)
      DISPLAY_WARNING("Your video card doesn't support GL_ARB_texture_env_combine extension");
   if (isExtensionSupported("GL_ARB_multitexture") == 0)
      DISPLAY_WARNING("Your video card doesn't support GL_ARB_multitexture extension");
   if (isExtensionSupported("GL_ARB_texture_mirrored_repeat") == 0)
      DISPLAY_WARNING("Your video card doesn't support GL_ARB_texture_mirrored_repeat extension");

   nbAuxBuffers = 4;
   //glGetIntegerv(GL_AUX_BUFFERS, &nbAuxBuffers);
   if (nbAuxBuffers > 0)
      printf("Congratulations, you have %d auxilliary buffers, we'll use them wisely !\n", nbAuxBuffers);

   blend_func_separate_support = 1;
   packed_pixels_support = 0;
   
   if (isExtensionSupported("GL_EXT_blend_func_separate") == 0)
   {
      DISPLAY_WARNING("GL_EXT_blend_func_separate not supported.\n");
      blend_func_separate_support = 0;
   }
   else
   {
      printf("GL_EXT_blend_func_separate supported.\n");
      blend_func_separate_support = 1;
   }

   // we can assume that non-GLES has GL_EXT_packed_pixels
   // support -it's included since OpenGL 1.2
#ifdef GLES
   if (isExtensionSupported("GL_EXT_packed_pixels") != 0)
#endif
      packed_pixels_support = 1;

   if (isExtensionSupported("GL_ARB_texture_non_power_of_two") == 0)
   {
      DISPLAY_WARNING("GL_ARB_texture_non_power_of_two supported.\n");
      npot_support = 0;
   }
   else
   {
      printf("GL_ARB_texture_non_power_of_two supported.\n");
      npot_support = 1;
   }

   if (isExtensionSupported("GL_EXT_fog_coord") == 0)
   {
      DISPLAY_WARNING("GL_EXT_fog_coord not supported.\n");
      fog_coord_support = 0;
   }
   else
   {
      printf("GL_EXT_fog_coord supported.\n");
      fog_coord_support = 1;
   }

   if (isExtensionSupported("GL_ARB_shading_language_100") &&
         isExtensionSupported("GL_ARB_shader_objects") &&
         isExtensionSupported("GL_ARB_fragment_shader") &&
         isExtensionSupported("GL_ARB_vertex_shader"))
   {}

#ifdef GLES
   if (isExtensionSupported("GL_EXT_texture_format_BGRA8888"))
   {
      printf("GL_EXT_texture_format_BGRA8888 supported.\n");
      bgra8888_support = 1;
   }
   else
   {
      DISPLAY_WARNING("GL_EXT_texture_format_BGRA8888 not supported.\n");
      bgra8888_support = 0;
   }
#endif

   glViewport(0, 0, width, height);
   viewport_width = width;
   viewport_height = height;

   // VP try to resolve z precision issues
   //  glMatrixMode(GL_MODELVIEW);
   //  glLoadIdentity();
   //  glTranslatef(0, 0, 1-zscale);
   //  glScalef(1, 1, zscale);

   widtho = width/2;
   heighto = height/2;

   pBufferWidth = pBufferHeight = -1;

   current_buffer = GL_BACK;

   texture_unit = GL_TEXTURE0;

   {
      int i;
      for (i=0; i<NB_TEXBUFS; i++)
         texbufs[i].start = texbufs[i].end = 0xffffffff;
   }

   FindBestDepthBias();

   init_geometry();
   init_textures();
   init_combiner();

   return 1;
}
Example #14
0
File: jc_u_tp0.c Project: etmc/cvc
int main(int argc, char **argv) {
  
  int c, i, j, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix, it;
  int sid, status, gid;
  double **corr=NULL, **corr2=NULL;
  double *tcorr=NULL, *tcorr2=NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  int do_gt   = 0;
  int nsource=0;
  char filename[100], contype[200];
  double ratime, retime;
  double plaq; 
  double spinor1[24], spinor2[24], U_[18];
  double *gauge_trafo=(double*)NULL;
  double mom2, mom4;
  complex w, w1, *cp1, *cp2, *cp3;
  FILE *ofs; 


#ifdef MPI
//  MPI_Init(&argc, &argv);
  fprintf(stderr, "[jc_ud_x] Error, only non-mpi version implemented\n");
  exit(1);
#endif

  while ((c = getopt(argc, argv, "h?f:")) != -1) {
    switch (c) {
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n");
  fprintf(stdout, "* jc_ud_x\n");
  fprintf(stdout, "**************************************************\n\n");

  /*********************************
   * initialize MPI parameters 
   *********************************/
  // mpi_init(argc, argv);

  /* initialize */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  /*************************************************
   * allocate mem for gauge field and spinor fields
   *************************************************/
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);

  no_fields = 2;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  nsource = (g_sourceid2 - g_sourceid + 1) / g_sourceid_step;
  if(g_cart_id==0) fprintf(stdout, "# nsource = %d\n", nsource);

  corr     = (double**)calloc( nsource, sizeof(double*));
  corr[0]  = (double*)calloc( nsource*T*8, sizeof(double));
  for(i=1;i<nsource;i++) corr[i] = corr[i-1] + 8*T;

  corr2    = (double**)calloc( nsource, sizeof(double*));
  corr2[0] = (double*)calloc( nsource*8*T, sizeof(double));
  for(i=1;i<nsource;i++) corr2[i] = corr2[i-1] + 8*T;

  tcorr  = (double*)calloc(T*8, sizeof(double));
  tcorr2 = (double*)calloc(T*8, sizeof(double));

  /***********************************************
   * start loop on gauge id.s 
   ***********************************************/
  for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) {

    sprintf(filename, "%s.%.4d", gaugefilename_prefix, gid);
    if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename);
    read_lime_gauge_field_doubleprec(filename);
    xchange_gauge();
    plaquette(&plaq);
    if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq);

    /* reset disc to zero */
    for(ix=0; ix<nsource*8*T; ix++) corr[0][ix]  = 0.;
    for(ix=0; ix<nsource*8*T; ix++) corr2[0][ix] = 0.;

    count=0;
    /***********************************************
     * start loop on source id.s 
     ***********************************************/
    for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {

      /* read the new propagator to g_spinor_field[0] */
      ratime = (double)clock() / CLOCKS_PER_SEC;
      if(format==0) {
        sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, gid, sid);
        if(read_lime_spinor(g_spinor_field[0], filename, 0) != 0) break;
      }
      else if(format==1) {
        sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, gid, sid);
        if(read_cmi(g_spinor_field[0], filename) != 0) {
          fprintf(stderr, "\nError from read_cmi\n");
          break;
        }
      }
      xchange_field(g_spinor_field[0]);
      retime = (double)clock() / CLOCKS_PER_SEC;
      if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime);

      ratime = (double)clock() / CLOCKS_PER_SEC;

      /* apply [1] = D_tm [0] */
      Q_phi_tbc(g_spinor_field[1], g_spinor_field[0]);
      xchange_field(g_spinor_field[1]);

      retime = (double)clock() / CLOCKS_PER_SEC;
      if(g_cart_id==0) fprintf(stdout, "# time to apply D_W: %e seconds\n", retime-ratime);

      ratime = (double)clock() / CLOCKS_PER_SEC;
      /* calculate real and imaginary part */
      for(mu=0; mu<4; mu++) {
        for(x0=0; x0<T; x0++) {
          for(x1=0; x1<LX; x1++) {
          for(x2=0; x2<LY; x2++) {
          for(x3=0; x3<LZ; x3++) {
            ix = g_ipt[x0][x1][x2][x3];
            _cm_eq_cm_ti_co(U_, g_gauge_field+_GGI(ix,mu), &(co_phase_up[mu]));
            _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[0][_GSI(g_iup[ix][mu])]);
            _fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
            _fv_mi_eq_fv(spinor2, spinor1);
            _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2);

            corr[count][2*(mu*T+x0)  ] -= 0.5*w.re;
            corr[count][2*(mu*T+x0)+1] -= 0.5*w.im;

            _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[0][_GSI(ix)]);
            _fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
            _fv_pl_eq_fv(spinor2, spinor1);
            _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(g_iup[ix][mu])], spinor2);

            corr[count][2*(mu*T+x0)  ] -= 0.5*w.re;
            corr[count][2*(mu*T+x0)+1] -= 0.5*w.im;

            _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[0][_GSI(ix)]);
            _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor1);
            corr2[count][2*(mu*T+x0)  ] -= w.re;
            corr2[count][2*(mu*T+x0)+1] -= w.im;
            
          }}}
        }
      }  // of mu

      count++;
    }  // of sid
    retime = (double)clock() / CLOCKS_PER_SEC;
    if(g_cart_id==0) fprintf(stdout, "# time to calculate contractions: %e seconds\n", retime-ratime);

    for(ix=0;ix<8*T;ix++) tcorr[ix] = 0.;
    for(ix=0;ix<8*T;ix++) tcorr2[ix] = 0.;
    
    for(i=0;i<nsource-1;i++) {
    for(j=i+1;j<nsource;j++)   {
      for(mu=0;mu<4;mu++) {
        for(x0=0;x0<T;x0++) {  // times at source
        for(x1=0;x1<T;x1++) {  // times at sink
          it = (x1 - x0 + T) % T;
          // conserved current
          tcorr[2*(mu*T+it)  ] += corr[i][2*(mu*T+x1)] * corr[j][2*(mu*T+x0)  ] - corr[i][2*(mu*T+x1)+1] * corr[j][2*(mu*T+x0)+1];
          tcorr[2*(mu*T+it)+1] += corr[i][2*(mu*T+x1)] * corr[j][2*(mu*T+x0)+1] + corr[i][2*(mu*T+x1)+1] * corr[j][2*(mu*T+x0)  ];
          tcorr[2*(mu*T+it)  ] += corr[j][2*(mu*T+x1)] * corr[i][2*(mu*T+x0)  ] - corr[j][2*(mu*T+x1)+1] * corr[i][2*(mu*T+x0)+1];
          tcorr[2*(mu*T+it)+1] += corr[j][2*(mu*T+x1)] * corr[i][2*(mu*T+x0)+1] + corr[j][2*(mu*T+x1)+1] * corr[i][2*(mu*T+x0)  ];

          // local current
          tcorr2[2*(mu*T+it)  ] += corr2[i][2*(mu*T+x1)] * corr2[j][2*(mu*T+x0)  ] - corr2[i][2*(mu*T+x1)+1] * corr2[j][2*(mu*T+x0)+1];
          tcorr2[2*(mu*T+it)+1] += corr2[i][2*(mu*T+x1)] * corr2[j][2*(mu*T+x0)+1] + corr2[i][2*(mu*T+x1)+1] * corr2[j][2*(mu*T+x0)  ];
          tcorr2[2*(mu*T+it)  ] += corr2[j][2*(mu*T+x1)] * corr2[i][2*(mu*T+x0)  ] - corr2[j][2*(mu*T+x1)+1] * corr2[i][2*(mu*T+x0)+1];
          tcorr2[2*(mu*T+it)+1] += corr2[j][2*(mu*T+x1)] * corr2[i][2*(mu*T+x0)+1] + corr2[j][2*(mu*T+x1)+1] * corr2[i][2*(mu*T+x0)  ];
        }}
      }
    }}

    fnorm = 1. / ( g_prop_normsqr * g_prop_normsqr * (double)(LX*LY*LZ) * (double)(LX*LY*LZ) * nsource * (nsource-1));
    if(g_cart_id==0) fprintf(stdout, "X-fnorm = %e\n", fnorm);
    for(ix=0;ix<8*T;ix++) tcorr[ix]  *= fnorm;
    for(ix=0;ix<8*T;ix++) tcorr2[ix] *= fnorm;

    /************************************************
     * save results
     ************************************************/
    if(g_cart_id == 0) fprintf(stdout, "# save results for gauge id %d and sid %d\n", gid, sid);

    /* save the result in position space */
    sprintf(filename, "jc_u_tp0.%.4d.%.4d", gid, sid);
    ofs = fopen(filename, "w");
    for(x0=0;x0<T;x0++) fprintf(ofs, "%d%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e\n", x0,
       tcorr[2*(0*T+x0)], tcorr[2*(0*T+x0)+1],
       tcorr[2*(1*T+x0)], tcorr[2*(1*T+x0)+1],
       tcorr[2*(2*T+x0)], tcorr[2*(2*T+x0)+1],
       tcorr[2*(3*T+x0)], tcorr[2*(3*T+x0)+1]);
    
    fclose(ofs);

  }  /* of loop on gid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  free(corr);
  free(corr2);
  free(tcorr);
  free(tcorr2);

  return(0);

}
Example #15
0
File: apply_Ddw.c Project: etmc/cvc
int main(int argc, char **argv) {
  
  int c, mu, nu, status;
  int i, j, ncon=-1, ir, is, ic, id;
  int filename_set = 0;
  int x0, x1, x2, x3, ix, iix;
  int y0, y1, y2, y3, iy, iiy;
  int start_valuet=0, start_valuex=0, start_valuey=0;
  int num_threads=1, threadid, nthreads;
  int seed, seed_set=0;
  double diff1, diff2;
/*  double *chi=NULL, *psi=NULL; */
  double plaq=0., pl_ts, pl_xs, pl_global;
  double *gauge_field_smeared = NULL;
  double s[18], t[18], u[18], pl_loc;
  double spinor1[24], spinor2[24];
  double *pl_gather=NULL;
  double dtmp;
  complex prod, w, w2;
  int verbose = 0;
  char filename[200];
  char file1[200];
  char file2[200];
  FILE *ofs=NULL;
  double norm, norm2;
  fermion_propagator_type *prop=NULL, prop2=NULL, seq_prop=NULL, seq_prop2=NULL, prop_aux=NULL, prop_aux2=NULL;
  int idx, eoflag, shift;
  float *buffer = NULL;
  unsigned int VOL3;
  size_t items, bytes;

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vf:N:c:C:t:s:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'N':
      ncon = atoi(optarg);
      break;
    case 'c':
      strcpy(file1, optarg);
      break;
    case 'C':
      strcpy(file2, optarg);
      break;
    case 't':
      num_threads = atoi(optarg);
      break;
    case 's':
      seed = atoi(optarg);
      fprintf(stdout, "# [] use seed value %d\n", seed);
      seed_set = 1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  if(g_cart_id==0) fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);


  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize T etc. */
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T_global     = %3d\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
                  "# [%2d] LX_global    = %3d\n"\
                  "# [%2d] LX           = %3d\n"\
		  "# [%2d] LXstart      = %3d\n"\
                  "# [%2d] LY_global    = %3d\n"\
                  "# [%2d] LY           = %3d\n"\
		  "# [%2d] LYstart      = %3d\n",\
		  g_cart_id, g_cart_id, T_global, g_cart_id, T, g_cart_id, Tstart,
		             g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart,
		             g_cart_id, LY_global, g_cart_id, LY, g_cart_id, LYstart);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(101);
  }
  geometry();

  if(init_geometry_5d() != 0) {
    fprintf(stderr, "ERROR from init_geometry_5d\n");
    exit(102);
  }
  geometry_5d();

  VOL3 = LX*LY*LZ;

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename);

  if(strcmp(gaugefilename_prefix, "identity")==0) {
    status = unit_gauge_field(g_gauge_field, VOLUME);
  } else {
    // status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq);
    // status = read_ildg_nersc_gauge_field(g_gauge_field, filename);
    status = read_lime_gauge_field_doubleprec(filename);
    // status = read_nersc_gauge_field(g_gauge_field, filename, &plaq);
    // status = 0;
  }
  if(status != 0) {
    fprintf(stderr, "[apply_Dtm] Error, could not read gauge field\n");
    exit(11);
  }
  xchange_gauge();

  // measure the plaquette
  if(g_cart_id==0) fprintf(stdout, "# read plaquette value 1st field: %25.16e\n", plaq);
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "# measured plaquette value 1st field: %25.16e\n", plaq);

  g_kappa5d = 0.5 / (5. + g_m0);
  fprintf(stdout, "# [] g_kappa5d = %e\n", g_kappa5d);

  no_fields=4;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], L5*VOLUMEPLUSRAND);

/*
  items = VOL3 * 288;
  bytes = items * sizeof(float);
  if( (buffer = (float*)malloc( bytes ) ) == NULL ) {
    fprintf(stderr, "[] Error, could not allocate buffer\n");
    exit(20);
  }
*/
  /****************************************
   * read read the spinor fields
   ****************************************/


/*
  prop = create_fp_field(VOL3);
  create_fp(&prop2);
  create_fp(&prop_aux);
  create_fp(&prop_aux2);
  create_fp(&seq_prop);
  create_fp(&seq_prop2);
*/
#ifdef MPI
  if(!seed_set) { seed = g_seed; }
  srand(seed+g_cart_id);
  for(ix=0;ix<VOLUME*L5;ix++) {
    for(i=0;i<24;i++) {
      spinor1[i] = 2* (double)rand() / (double)RAND_MAX - 1.;
    }
    _fv_eq_fv(g_spinor_field[0]+_GSI(ix), spinor1 );
  }
  for(i=0;i<g_nproc;i++) {
    if(g_cart_id==i) {
      if(i==0) ofs = fopen("source", "w");
      else ofs = fopen("source", "a");
      for(is=0;is<L5;is++) { 
        for(x0=0;x0<T; x0++) {
        for(x1=0;x1<LX; x1++) {
        for(x2=0;x2<LX; x2++) {
        for(x3=0;x3<LX; x3++) {
          iix = is*VOLUME*g_nproc + (((x0+g_proc_coords[0]*T)*LX*g_nproc_x+ x1+g_proc_coords[1]*LX )*LY*g_nproc_y + x2+g_proc_coords[2]*LY )*LZ*g_nproc_z + x3+g_proc_coords[3]*LZ;
          ix = g_ipt_5d[is][x0][x1][x2][x3];
          for(c=0;c<24;c++) {
            fprintf(ofs, "%8d%8d%3d%25.16e\n", iix, ix, c, g_spinor_field[0][_GSI(ix)+c]);
          }
        }}}}
      }
      fclose(ofs);
    }
#ifdef MPI
    MPI_Barrier(g_cart_grid);
#endif
  }
#else
  ofs = fopen("source", "r");
  for(ix=0;ix<24*VOLUME*L5;ix++) {
    fscanf(ofs, "%d%d%d%lf", &x1,&x2,&x3, &dtmp);
    g_spinor_field[0][_GSI(x1)+x3] = dtmp;
  }
  fclose(ofs);

#endif
  xchange_field_5d(g_spinor_field[0]);
  Q_DW_Wilson_dag_phi(g_spinor_field[1], g_spinor_field[0]);
  xchange_field_5d(g_spinor_field[1]);
  Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[1]);
  sprintf(filename, "prop_%.2d.%.2d", g_nproc, g_cart_id);
  ofs = fopen(filename, "w");
  printf_spinor_field_5d(g_spinor_field[2], ofs);
  fclose(ofs);

//  for(ix=0;ix<VOLUME*L5;ix++) {
//    for(i=0;i<24;i++) {
//      spinor1[i] = 2* (double)rand() / (double)RAND_MAX - 1.;
//    }
//    _fv_eq_fv(g_spinor_field[1]+_GSI(ix), spinor1 );
//  }
/*
  xchange_field_5d(g_spinor_field[0]);
  sprintf(filename, "spinor.%.2d", g_cart_id);
  ofs = fopen(filename, "w");
  printf_spinor_field_5d(g_spinor_field[0], ofs);
  fclose(ofs);
*/
/*
  // 2 = D 0
  Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[0]);
  // 3 = D^dagger 1
  Q_DW_Wilson_dag_phi(g_spinor_field[3], g_spinor_field[1]);

  // <1, 2> = <1, D 0 >
  spinor_scalar_product_co(&w, g_spinor_field[1], g_spinor_field[2], VOLUME*L5);
  // <3, 0> = < D^dagger 1, 0 >
  spinor_scalar_product_co(&w2, g_spinor_field[3], g_spinor_field[0], VOLUME*L5);
  fprintf(stdout, "# [] w  = %e + %e*1.i\n", w.re, w.im);
  fprintf(stdout, "# [] w2 = %e + %e*1.i\n", w2.re, w2.im);
  fprintf(stdout, "# [] abs difference = %e \n", sqrt(_SQR(w2.re-w.re)+_SQR(w2.im-w.im)) );
*/

/*
  for(i=0;i<12;i++) {
    fprintf(stdout, "s1[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor1[2*i], spinor1[2*i+1]);
  }
  for(i=0;i<24;i++) {
    spinor2[i] = 2* (double)rand() / (double)RAND_MAX - 1.;
  }
  for(i=0;i<12;i++) {
    fprintf(stdout, "s2[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor2[2*i], spinor2[2*i+1]);
  }

  _fv_mi_eq_PRe_fv(spinor2, spinor1);
  for(i=0;i<12;i++) {
    fprintf(stdout, "s3[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor2[2*i], spinor2[2*i+1]);
  }
*/
/*
  ofs = fopen("dw_spinor", "w");
  Q_DW_Wilson_phi(g_spinor_field[1], g_spinor_field[0]);
  printf_spinor_field(g_spinor_field[1], ofs);
  fclose(ofs);

  g_kappa = g_kappa5d;
  ofs = fopen("wilson_spinor", "w");
  Q_Wilson_phi(g_spinor_field[2], g_spinor_field[0]);
  printf_spinor_field(g_spinor_field[2], ofs);
  fclose(ofs);
*/
#ifdef _UNDEF
  /*******************************************************************
   * propagators
   *******************************************************************/
//  for(i=0; i<12;i++)
  for(i=0; i<1;i++)
  {

    //sprintf(file1, "source.%.4d.t00x00y00z00.%.2d.inverted", Nconf, i);

    sprintf(file1, "/home/mpetschlies/quda-0.3.2/tests/prop");
    if(g_cart_id==0) fprintf(stdout, "# Reading prop. from file %s\n", file1);
    fflush(stdout);
    //if( read_lime_spinor(g_spinor_field[0], file1, 0) != 0 ) {
    ofs = fopen(file1, "rb");
    if( fread(g_spinor_field[0], sizeof(double), 24*L5*VOLUME, ofs) !=  24*L5*VOLUME) {
      fprintf(stderr, "Error, could not read proper amount of data from file %s\n", file1);
      exit(100);
    }
    fclose(ofs);

    for(ix=0;ix<VOLUME*L5;ix++) {
      _fv_ti_eq_re(g_spinor_field[0]+_GSI(ix), 2.*g_kappa5d);
    }

/*
    if( (ofs = fopen("prop_full", "w")) == NULL ) exit(22);
    for(ix=0;ix<L5;ix++) {
      fprintf(ofs, "# [] s = %d\n", ix);
      printf_spinor_field(g_spinor_field[0]+_GSI(ix*VOLUME), ofs);
    }
    fclose(ofs);
*/

    // reorder, multiply with g2
    for(is=0,iix=0; is<L5; is++) {
    for(ix=0; ix<VOLUME; ix++) {
      iiy = lexic2eot_5d (is, ix);
      _fv_eq_fv(spinor1, g_spinor_field[0]+_GSI(iiy));
      _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(iix), 2, spinor1 );
      iix++;
    }}

    Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[1]);
//    Q_DW_Wilson_dag_phi(g_spinor_field[2], g_spinor_field[1]);
    fprintf(stdout, "# [] finished  application of Dirac operator\n");
    fflush(stdout);


    // reorder, multiply with g2
    for(is=0, iix=0;is<L5;is++) {
    for(ix=0; ix<VOLUME; ix++) {
      iiy = lexic2eot_5d(is, ix);
      _fv_eq_fv(spinor1, g_spinor_field[2]+_GSI(iix));
      _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(iiy), 2, spinor1 );
      iix++;
    }}

    if( (ofs = fopen("my_out", "w")) == NULL ) exit(23);
    for(ix=0;ix<L5;ix++) {
      fprintf(ofs, "# [] s = %d\n", ix);
      printf_spinor_field(g_spinor_field[1]+_GSI(ix*VOLUME), ofs);
    }
    fclose(ofs);


    sprintf(file1, "/home/mpetschlies/quda-0.3.2/tests/source");
    if(g_cart_id==0) fprintf(stdout, "# Reading prop. from file %s\n", file1);
    fflush(stdout);
    //if( read_lime_spinor(g_spinor_field[0], file1, 0) != 0 ) {
    
    ofs = fopen(file1, "rb");
    if( fread(g_spinor_field[2], sizeof(double), 24*L5*VOLUME, ofs) !=  24*L5*VOLUME) {
      fprintf(stderr, "Error, could not read proper amount of data from file %s\n", file1);
      exit(100);
    }
    fclose(ofs);

    
/*
    if( (ofs = fopen("v_out", "w")) == NULL ) exit(23);
    for(ix=0;ix<L5;ix++) {
      fprintf(ofs, "# [] s = %d\n", ix);
      printf_spinor_field(g_spinor_field[2]+_GSI(ix*VOLUME), ofs);
    }
    fclose(ofs);
*/
    spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5);
    for(ix=0;ix<VOLUME*L5;ix++) {
      _fv_mi_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[2]+_GSI(ix));
    }
    spinor_scalar_product_re(&norm, g_spinor_field[1], g_spinor_field[1], VOLUME*L5);
    fprintf(stdout, "\n# [] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) );

  }  // of loop on spin color indices
#endif
  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  free_geometry();
  if(gauge_field_smeared != NULL) free(gauge_field_smeared);
  if(g_spinor_field != NULL) {
    for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
    free(g_spinor_field);
  }
  free(buffer);

  free_fp_field(&prop);
  free_fp(&prop2);
  free_fp(&prop_aux);
  free_fp(&prop_aux2);
  free_fp(&seq_prop);
  free_fp(&seq_prop2);

  g_the_time = time(NULL);
  fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stderr);


#ifdef MPI
  MPI_Finalize();
#endif
  return(0);
}
Example #16
0
int main(int argc, char *argv[])
{
  Parameters *parameters; // user defined parameters
  Geometry *geometry; // homogenous cube geometry
  Material *material; // problem material
  Bank *source_bank; // array for particle source sites
  Tally *tally; // scalar flux tally
  double *keff; // effective multiplication factor
  double t1, t2; // timers

  #ifdef _OPENMP
    unsigned long counter = 0; //counter to decide the start pos of master bank copy from sub banks
    Bank *g_fission_bank; //global fission bank
  #endif

  // Get inputs: set parameters to default values, parse parameter file,
  // override with any command line inputs, and print parameters
  parameters = init_parameters();
  parse_parameters(parameters);
  read_CLI(argc, argv, parameters);
  print_parameters(parameters);


  // Set initial RNG seed
  set_initial_seed(parameters->seed);
  set_stream(STREAM_INIT);

  // Create files for writing results to
  init_output(parameters);

  // Set up geometry
  geometry = init_geometry(parameters);

  // Set up material
  material = init_material(parameters);

  // Set up tallies
  tally = init_tally(parameters);

  // Create source bank and initial source distribution
  source_bank = init_source_bank(parameters, geometry);

  // Create fission bank
  #ifdef _OPENMP
    omp_set_num_threads(parameters->n_threads); // Set number of openmp threads
    printf("threads num: %d\n", parameters->n_threads);
    // Allocate one master fission bank
    g_fission_bank = init_bank(2*parameters->n_particles);
  #endif

  // Set up array for k effective
  keff = calloc(parameters->n_active, sizeof(double));

  center_print("SIMULATION", 79);
  border_print();
  printf("%-15s %-15s %-15s\n", "BATCH", "KEFF", "MEAN KEFF");

  #ifdef _OPENMP
    // Start time
    t1 = omp_get_wtime();

    run_eigenvalue(counter, g_fission_bank, parameters, geometry, material, source_bank, fission_bank, tally, keff);

    // Stop time
    t2 = omp_get_wtime();
  #endif

  printf("Simulation time: %f secs\n", t2-t1);

  // Free memory
  #ifdef _OPENMP
    free_bank(g_fission_bank);
  #endif

  free(keff);
  free_tally(tally);
  free_bank(source_bank);
  free_material(material);
  free(geometry);
  free(parameters);

  return 0;
}
Example #17
0
File: cvc_2pt.c Project: etmc/cvc
int main(int argc, char **argv) {
  
  int c, i, mu;
  int count        = 0;
  int filename_set = 0;
  int l_LX_at, l_LXstart_at;
  int x0, x1, ix, idx;
  int VOL3;
  int sid;
  double *disc = (double*)NULL;
  int verbose = 0;
  char filename[100];
  double ratime, retime;
  double plaq;
  double spinor1[24], spinor2[24];
  double _2kappamu;
  double *gauge_field_f=NULL, *gauge_field_timeslice=NULL;
  double v4norm = 0., vvnorm = 0.;
  complex w;
  FILE *ofs1, *ofs2;
/*  double sign_adj5[] = {-1., -1., -1., -1., +1., +1., +1., +1., +1., +1., -1., -1., -1., 1., -1., -1.}; */
  double hopexp_coeff[8], addreal, addimag;
  int gindex[]    = { 5 , 1 , 2 , 3 ,  6 ,10 ,11 ,12 , 4 , 7 , 8 , 9 , 0 ,15 , 14 ,13 };
  int isimag[]    = { 0 , 0 , 0 , 0 ,  1 , 1 , 1 , 1 , 0 , 1 , 1 , 1 , 0 , 1 ,  1 , 1 };
  double gsign[]  = {-1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1., 1.};


#ifdef MPI
  MPI_Status status;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

#ifdef MPI
  T = T_global / g_nproc;
  Tstart = g_cart_id * T;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  VOL3 = LX*LY*LZ;
#else
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  VOL3 = LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
  xchange_gauge();

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq);

  if(Nlong > -1) {
/*    N_ape     = 5; */
    alpha_ape = 0.4;
    if(g_cart_id==0) fprintf(stdout, "# apply fuzzing of gauge field and propagators with parameters:\n"\
                                     "# Nlong = %d\n# N_ape = %d\n# alpha_ape = %f\n", Nlong, N_ape, alpha_ape);
    alloc_gauge_field(&gauge_field_f, VOLUMEPLUSRAND);
    if( (gauge_field_timeslice = (double*)malloc(72*VOL3*sizeof(double))) == (double*)NULL  ) {
      fprintf(stderr, "Error, could not allocate mem for gauge_field_timeslice\n");
#ifdef MPI
      MPI_Abort(MPI_COMM_WORLD, 1);
      MPI_Finalize();
#endif
      exit(2);
    }
    for(x0=0; x0<T; x0++) {
      memcpy((void*)gauge_field_timeslice, (void*)(g_gauge_field+_GGI(g_ipt[x0][0][0][0],0)), 72*VOL3*sizeof(double));
      for(i=0; i<N_ape; i++) {
        APE_Smearing_Step_Timeslice(gauge_field_timeslice, alpha_ape);
      }
      fuzzed_links_Timeslice(gauge_field_f, gauge_field_timeslice, Nlong, x0);
    }
    free(gauge_field_timeslice);
  }

  /* test: print the fuzzed APE smeared gauge field to stdout */
/*
  for(ix=0; ix<36*VOLUME; ix++) {
    fprintf(stdout, "%6d%25.16e%25.16e%25.16e%25.16e\n", ix, gauge_field_f[2*ix], gauge_field_f[2*ix+1], g_gauge_field[2*ix], g_gauge_field[2*ix+1]);
  }
*/

  /* allocate memory for the spinor fields */
  no_fields = 4;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /* allocate memory for the contractions */
  disc = (double*)calloc(4*16*T*2, sizeof(double));
  if( disc==(double*)NULL ) {
    fprintf(stderr, "could not allocate memory for disc\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(3);
  }
  for(ix=0; ix<4*32*T; ix++) disc[ix] = 0.;

  if(g_cart_id==0) {
    sprintf(filename, "cvc_2pt_disc_vv.%.4d", Nconf);
    ofs1 = fopen(filename, "w");
    sprintf(filename, "cvc_2pt_disc_v4.%.4d", Nconf);
    ofs2 = fopen(filename, "w");
    if(ofs1==(FILE*)NULL || ofs2==(FILE*)NULL) {
#ifdef MPI
        MPI_Abort(MPI_COMM_WORLD, 1);
        MPI_Finalize();
#endif
        exit(5);
    }
  }

  /* add the HPE coefficients */
  if(format==1) {
    addimag = 2*g_kappa*g_mu/sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)* LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4;
    addreal = 1./sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)*LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4;
    v4norm = 1. / ( 8. * g_kappa * g_kappa );
    vvnorm = g_mu / ( 4. * g_kappa );
  } else {
    addimag = 2*g_kappa*g_mu/sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)* LX*LY*LZ*3*4*2.*g_kappa*2;
    addreal = 1./sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)*LX*LY*LZ*3*4*2.*g_kappa*2;
    v4norm = 1. / ( 4. * g_kappa  );
    vvnorm = g_mu / ( 4. * g_kappa );
  }

  /* calculate additional contributions for 1 and gamma_5 */
  _2kappamu = 2.*g_kappa*g_mu;
  hopexp_coeff[0] = 24. * g_kappa * LX*LY*LZ / (1. + _2kappamu*_2kappamu);
  hopexp_coeff[1] = 0.;
  
  hopexp_coeff[2] = -768. * g_kappa*g_kappa*g_kappa * LX*LY*LZ * _2kappamu*_2kappamu /
   ( (1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu) );
  hopexp_coeff[3] = 0.;

  hopexp_coeff[4] = 0.;
  hopexp_coeff[5] = -24.*g_kappa * LX*LY*LZ * _2kappamu / (1. + _2kappamu*_2kappamu);

  hopexp_coeff[6] = 0.;
  hopexp_coeff[7] = -384. * g_kappa*g_kappa*g_kappa * LX*LY*LZ * 
    (1.-_2kappamu*_2kappamu)*_2kappamu /
   ( (1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu) );

  /* start loop on source id.s */
  for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {
    for(ix=0; ix<4*32*T; ix++) disc[ix] = 0.;

    /* read the new propagator */
    sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); 
/*    sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); */
    if(read_lime_spinor(g_spinor_field[1], filename, 0) != 0) {
      fprintf(stderr, "[%2d] Error, could not read from file %s\n", g_cart_id, filename);
#ifdef MPI
      MPI_Abort(MPI_COMM_WORLD, 1);
      MPI_Finalize();
#endif
      exit(4);
    }
    count++;
    xchange_field(g_spinor_field[1]);

    /* calculate the source: apply Q_phi_tbc */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[1]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to apply Q_tm %e seconds\n", retime-ratime);


    /* apply gamma5_BdagH4_gamma5 */
    gamma5_BdagH4_gamma5(g_spinor_field[2], g_spinor_field[0], g_spinor_field[3]);

    /* attention: additional factor 2kappa because of CMI format */
/*
    if(format==1) {
      for(ix=0; ix<VOLUME; ix++) {
        _fv_ti_eq_re(&g_spinor_field[2][_GSI(ix)], 2.*g_kappa);
      }
    }
*/

    if(Nlong>-1) {
      if(g_cart_id==0) fprintf(stdout, "# fuzzing propagator with Nlong = %d\n", Nlong);
      memcpy((void*)g_spinor_field[3], (void*)g_spinor_field[1], 24*VOLUMEPLUSRAND*sizeof(double));
      Fuzz_prop(gauge_field_f, g_spinor_field[3], Nlong);
    }

    /* add new contractions to disc */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    for(x0=0; x0<T; x0++) {             /* loop on time */
      for(x1=0; x1<VOL3; x1++) {    /* loop on sites in timeslice */
        ix = x0*VOL3 + x1;
        for(mu=0; mu<16; mu++) { /* loop on index of gamma matrix */

          _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[1][_GSI(ix)]);
  	  _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[2][_GSI(ix)], spinor1);
	  disc[2*(       x0*16+mu)  ] += w.re;
	  disc[2*(       x0*16+mu)+1] += w.im;
     
          _fv_eq_gamma_ti_fv(spinor1, 5, &g_spinor_field[1][_GSI(ix)]);
          _fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
  	  _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2);
	  disc[2*(16*T + x0*16+mu)  ] += w.re;
	  disc[2*(16*T + x0*16+mu)+1] += w.im;
        
          if(Nlong>-1) {
            _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[3][_GSI(ix)]);
    	    _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[2][_GSI(ix)], spinor1);
	    disc[2*(32*T + x0*16+mu)  ] += w.re;
	    disc[2*(32*T + x0*16+mu)+1] += w.im;
          
            _fv_eq_gamma_ti_fv(spinor1, 5, &g_spinor_field[3][_GSI(ix)]);
            _fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
  	    _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2);
	    disc[2*(48*T + x0*16+mu)  ] += w.re;
	    disc[2*(48*T + x0*16+mu)+1] += w.im;
          }
        }
      }
    }

    if(g_cart_id==0) fprintf(stdout, "# addimag = %25.16e\n", addimag);
    if(g_cart_id==0) fprintf(stdout, "# addreal = %25.16e\n", addreal);
    for(x0=0; x0<T; x0++) {   
      disc[2*(       x0*16+4)  ] += addreal;
      disc[2*(       x0*16+5)+1] -= addimag;
/* 
      if(Nlong>-1) {
        disc[2*(32*T + x0*16+4)  ] += addreal;
        disc[2*(32*T + x0*16+5)+1] -= addimag; 
      }
*/
    }
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# contractions in %e seconds\n", retime-ratime);

    /* write current disc to file */

    if(g_cart_id==0) {
      if(sid==g_sourceid) fprintf(ofs1, "#%6d%3d%3d%3d%3d\t%f\t%f\n", Nconf, T, LX, LY, LZ, g_kappa, g_mu);
      if(sid==g_sourceid) fprintf(ofs2, "#%6d%3d%3d%3d%3d\t%f\t%f\n", Nconf, T, LX, LY, LZ, g_kappa, g_mu);
      for(x0=0; x0<T; x0++) {
        for(mu=0; mu<16; mu++) {
          idx = gindex[mu];
          ix = 16*x0 + idx;
          if(isimag[mu]==0) {
            fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
              Nconf, mu, x0, sid,
              gsign[mu]*disc[2*      ix ]*v4norm, gsign[mu]*disc[2*      ix +1]*v4norm,
              gsign[mu]*disc[2*(32*T+ix)]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm);
          } else {
            fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
              Nconf, mu, x0, sid,
              gsign[mu]*disc[2*(     ix)+1]*v4norm, -gsign[mu]*disc[2*      ix ]*v4norm,
              gsign[mu]*disc[2*(32*T+ix)+1]*v4norm, -gsign[mu]*disc[2*(32*T+ix)]*v4norm);
          }
        }
      }
      for(x0=0; x0<T; x0++) {
        for(mu=0; mu<16; mu++) {
          idx = gindex[mu];
          ix = 16*x0 + idx;
          if(isimag[mu]==0) {
            fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
              Nconf, mu, x0, sid,
              gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm,
              gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm);
          } else {
            fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
              Nconf, mu, x0, sid,
              -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm,
              -gsign[mu]*disc[2*(48*T+ix)]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm);
          }
        }
      }
#ifdef MPI
      for(c=1; c<g_nproc; c++) {
        MPI_Recv(disc, 128*T, MPI_DOUBLE, c, 100+c, g_cart_grid, &status);
        for(x0=0; x0<T; x0++) {
          for(mu=0; mu<16; mu++) {
            idx=gindex[mu];
            ix = 16*x0 + idx;
            if(isimag[mu]==0) {
              fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
                Nconf, mu, c*T+x0, sid,
                gsign[mu]*disc[2*      ix ]*v4norm, gsign[mu]*disc[2*      ix +1]*v4norm,
                gsign[mu]*disc[2*(32*T+ix)]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm);
            } else {
              fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
                Nconf, mu, c*T+x0, sid,
                gsign[mu]*disc[2*(     ix)+1]*v4norm, -gsign[mu]*disc[2*      ix ]*v4norm,
                gsign[mu]*disc[2*(32*T+ix)+1]*v4norm, -gsign[mu]*disc[2*(32*T+ix)]*v4norm);
            }
          }
        }
        for(x0=0; x0<T; x0++) {
          for(mu=0; mu<16; mu++) {
            idx = gindex[mu];
            ix = 16*x0 + idx;
            if(isimag[mu]==0) {
              fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
                Nconf, mu, c*T+x0, sid,
                gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm,
                gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm);
            } else {
              fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n",
                Nconf, mu, c*T+x0, sid,
                -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm,
                -gsign[mu]*disc[2*(48*T+ix)]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm);
            }
          }
        }
      }
#endif
    }
#ifdef MPI
    else {
      for(c=1; c<g_nproc; c++) {
        if(g_cart_id==c) {
          MPI_Send(disc, 128*T, MPI_DOUBLE, 0, 100+c, g_cart_grid);
        }
      }
    }
#endif
  }  /* of loop on sid */

  if(g_cart_id==0) { fclose(ofs1); fclose(ofs2); }

  if(g_cart_id==0) {
    fprintf(stdout, "# contributions from HPE:\n");
    fprintf(stdout, "(1) X = id\t%25.16e%25.16e\n"\
                    "          \t%25.16e%25.16e\n"\
    		    "(2) X =  5\t%25.16e%25.16e\n"\
                    "          \t%25.16e%25.16e\n",
		    hopexp_coeff[0], hopexp_coeff[1], hopexp_coeff[2], hopexp_coeff[3],
		    hopexp_coeff[4], hopexp_coeff[5], hopexp_coeff[6], hopexp_coeff[7]);
  }

  /* free the allocated memory, finalize */
  free(g_gauge_field); g_gauge_field=(double*)NULL;
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field); g_spinor_field=(double**)NULL;
  free_geometry();
  free(disc);
  if(Nlong>-1) free(gauge_field_f);
#ifdef MPI
  MPI_Finalize();
#endif

  return(0);

}
Example #18
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int sid, status;
  double *disc = (double*)NULL;
  double *data = (double*)NULL;
  double *bias = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  char filename[100], contype[200];
  double ratime, retime;
  double plaq; 
  double spinor1[24], spinor2[24], U_[18];
  complex w, w1, *cp1, *cp2, *cp3, *cp4;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa <= 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.\n");
    usage();
  }

  if(hpe_order%2==0 && hpe_order>0) {
    if(g_proc_id==0) fprintf(stdout, "HPE order should be odd\n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n"\
                  "* vp_disc_hpe_stoch_subtract with HPE of order %d\n"\
                  "**************************************************\n\n", hpe_order);

  /*********************************
   * initialize MPI parameters 
   *********************************/
  mpi_init(argc, argv);

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(101);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(102);
  }

  geometry();

  /************************************************
   * read the gauge field, measure the plaquette 
   ************************************************/
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
  xchange_gauge();

  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq);

  /****************************************
   * allocate memory for the spinor fields
   ****************************************/
  no_fields = 3;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc(16*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(103);
  }

  data = (double*)calloc(16*VOLUME, sizeof(double));
  if( data== (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for data\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(104);
  }
  for(ix=0; ix<16*VOLUME; ix++) data[ix] = 0.;

  work  = (double*)calloc(32*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(105);
  }

  bias  = (double*)calloc(32*VOLUME, sizeof(double));
  if( bias == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for bias\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(106);
  }
  for(ix=0; ix<32*VOLUME; ix++) bias[ix] = 0.;

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(107);
  }

  /***********************************************
   * start loop on source id.s 
   ***********************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {
    for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.;

    /* read the new propagator */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime);

    count++;

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to calculate source: %e seconds\n", retime-ratime);

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    /************************************************
     * HPE: apply BH to order hpe_order+2 
     ************************************************/
    if(hpe_order>0) {
      BHn(g_spinor_field[1], g_spinor_field[2], hpe_order+2);
    } else {
      memcpy((void*)g_spinor_field[1], (void*)g_spinor_field[2], 24*VOLUMEPLUSRAND*sizeof(double));
    }

    /************************************************
     * add new contractions to (existing) disc
     ************************************************/
    for(mu=0; mu<4; mu++) { 
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] = -0.5 * w.re;
	disc[iix+1] = -0.5 * w.im;
	data[iix  ] -= 0.5 * w.re;
	data[iix+1] -= 0.5 * w.im;

	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;
	data[iix  ] -= 0.5 * w.re;
	data[iix+1] -= 0.5 * w.im;

	iix += 2;
      }
    }
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime);
 
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));

      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }  /* of mu =0 ,..., 3*/

    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(disc+_GWI(mu,     0,VOLUME));
      cp2 = (complex*)(disc+_GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME));
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re;
        cp3->im += w1.im;
	cp1++; cp2++; cp3++;
      }
    }}
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time for Fourier trafo and adding to bias: %e seconds\n", 
      retime-ratime);
  }  /* of loop on sid */

  /************************************************
   * save results for count == Nsave 
   ************************************************/
  if(count==Nsave) {

    if(g_cart_id == 0) fprintf(stdout, "# save results for count = %d\n", count);

    for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.;

    if(hpe_order>0) {
      sprintf(filename, "vp_disc_hpe%.2d_loops_X.%.4d", hpe_order, Nconf);
      if(g_cart_id==0) fprintf(stdout, "# reading loop part from file %s\n", filename);
      if( (status = read_lime_contraction(disc, filename, 4, 0)) != 0 ) {
#ifdef MPI
        MPI_Abort(MPI_COMM_WORLD, 1);
        MPI_Finalize();
#endif
        exit(108);
      }
    }


    /* save the result in position space */
    fnorm = 1. / ( (double)count * g_prop_normsqr );
    if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
      for(ix=0; ix<VOLUME; ix++) {
        work[_GWI(mu,ix,VOLUME)  ] = data[_GWI(mu,ix,VOLUME)  ] * fnorm + disc[_GWI(mu,ix,VOLUME)  ];
        work[_GWI(mu,ix,VOLUME)+1] = data[_GWI(mu,ix,VOLUME)+1] * fnorm + disc[_GWI(mu,ix,VOLUME)+1];
      }
    }
    sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d", hpe_order, Nconf, count);
    sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-X", hpe_order, hpe_order+2);
    write_lime_contraction(work, filename, 64, 4, contype, Nconf, count);
/*
    sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d.ascii", hpe_order, Nconf, count);
    write_contraction(work, NULL, filename, 4, 2, 0);
*/

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(data+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));

      memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(data+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }  

    fnorm = 1. / ( g_prop_normsqr*g_prop_normsqr * (double)count * (double)(count-1) );
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm for purely stochastic part = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(data+_GWI(mu,     0,VOLUME));
      cp2 = (complex*)(data+_GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work+_GWI(4*mu+nu,0,VOLUME));
      cp4 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME)); 
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re = ( w1.re - cp4->re ) * fnorm;
        cp3->im = ( w1.im - cp4->im ) * fnorm;
        cp1++; cp2++; cp3++; cp4++;
      }
    }}
  
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));

      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }
 
    fnorm = 1. / ( g_prop_normsqr * (double)count );
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm for mixed stochastic-loop part = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(data + _GWI(mu,     0,VOLUME));
      cp2 = (complex*)(disc + _GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME));
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re * fnorm;
        cp3->im += w1.im * fnorm;
        cp1++; cp2++; cp3++;
      }

      cp1 = (complex*)(disc + _GWI(mu,     0,VOLUME));
      cp2 = (complex*)(data + _GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME));
      for(ix=0; ix<VOLUME; ix++) {
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re * fnorm;
        cp3->im += w1.im * fnorm;
        cp1++; cp2++; cp3++;
      }
    }}

    fnorm = 1. / ( (double)T_global * (double)(LX*LY*LZ) );
    if(g_cart_id==0) fprintf(stdout, "# P-fnorm for final estimator (1/T/V) = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(disc + _GWI(mu,     0,VOLUME));
      cp2 = (complex*)(disc + _GWI(4+nu,   0,VOLUME));
      cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME));
      for(x0=0; x0<T; x0++) {
        q[0] = (double)(x0+Tstart) / (double)T_global;
      for(x1=0; x1<LX; x1++) {
        q[1] = (double)x1 / (double)LX;
      for(x2=0; x2<LY; x2++) {
        q[2] = (double)x2 / (double)LY;
      for(x3=0; x3<LZ; x3++) {
        q[3] = (double)x3 / (double)LZ;
        ix = g_ipt[x0][x1][x2][x3];
        w.re = cos(M_PI * ( q[mu] - q[nu] ) );
        w.im = sin(M_PI * ( q[mu] - q[nu] ) );
        _co_eq_co_ti_co(&w1, cp1, cp2);
        cp3->re += w1.re;
        cp3->im += w1.im;
        _co_eq_co_ti_co(&w1, cp3, &w);
        cp3->re = w1.re * fnorm;
        cp3->im = w1.im * fnorm;
        cp1++; cp2++; cp3++;
      }}}}
    }}

    sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d", hpe_order, Nconf, count);
    sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-P", hpe_order, hpe_order+2);
    write_lime_contraction(work, filename, 64, 16, contype, Nconf, count);
/*
    sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d.ascii", hpe_order, Nconf, count);
    write_contraction(work, NULL, filename, 16, 2, 0);
*/
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime);
  }  /* of if count == Nsave */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc);
  free(bias);
  free(data);
  free(work);
#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif
  return(0);
}
Example #19
0
int main(int argc, char **argv) {
  
  int c;
  int count, ncon=-1;
  int filename_set = 0;
  int ix;
  double *disc  = (double*)NULL;
  double *disc2 = (double*)NULL;
  double adiffre, adiffim, mdiffre, mdiffim, Mdiffre, Mdiffim, hre, him;
  int verbose = 0;
  char filename[200];
  char file1[200];
  char file2[200];


  while ((c = getopt(argc, argv, "h?vf:N:c:C:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'N':
      ncon = atoi(optarg);
      break;
    case 'c':
      strcpy(file1, optarg);
      break;
    case 'C':
      strcpy(file2, optarg);
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  if(g_cart_id==0) fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);


  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize */
  T      = T_global;
  Tstart = 0;
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n",\
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(101);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  if(ncon<=0) {
    fprintf(stderr, "Error, incompatible contraction type specified; exit\n");
    exit(102);
  } else {
    fprintf(stdout, "# Using contraction type %d\n", ncon);
  }
  disc  = (double*)calloc(2*ncon*VOLUME, sizeof(double));
  if( disc  == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
    exit(103);
  }
  disc2 = (double*)calloc(2*ncon*VOLUME, sizeof(double));
  if( disc2 == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc2\n");
    exit(104);
  }

  /****************************************
   * read contractions
   ****************************************/
  if( read_lime_contraction(disc,  file1, 64, ncon, 0) != 0 ) {
    fprintf(stderr, "Error, could not read from file %s; exit\n", file1);
    exit(105);
  }
  if( read_lime_contraction(disc2, file2, 64, ncon, 0) != 0 ) {
    fprintf(stderr, "Error, could not read from file %s; exit\n", file2);
    exit(106);
  }

  /****************************************
   * calculate difference
   ****************************************/
  mdiffre = fabs(disc[0] - disc2[0]);
  mdiffim = fabs(disc[1] - disc2[1]);
  Mdiffre = 0.;
  Mdiffim = 0.;
  adiffre = 0.;
  adiffim = 0.;
  for(ix=0; ix<ncon*VOLUME; ix++) {
    adiffre += disc[2*ix  ] - disc2[2*ix  ];
    adiffim += disc[2*ix+1] - disc2[2*ix+1];
    hre = fabs(disc[2*ix  ] - disc2[2*ix  ]);
    him = fabs(disc[2*ix+1] - disc2[2*ix+1]);
    if(hre<mdiffre) mdiffre = hre;
    if(hre>Mdiffre) Mdiffre = hre;
    if(him<mdiffim) mdiffim = him;
    if(him>Mdiffim) Mdiffim = him;
  }
  adiffre /= (double)VOLUME * (double)ncon;
  adiffim /= (double)VOLUME * (double)ncon;

  fprintf(stdout, "# Results for files %s and %s:\n", file1, file2);
  fprintf(stdout, "average difference\t%25.16e\t%25.16e\n", adiffre, adiffim);
  fprintf(stdout, "minimal abs. difference\t%25.16e\t%25.16e\n", mdiffre, mdiffim);
  fprintf(stdout, "maximal abs. difference\t%25.16e\t%25.16e\n", Mdiffre, Mdiffim);

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free_geometry();
  free(disc);
  free(disc2);

  return(0);

}
Example #20
0
int main(int argc, char **argv) {
  
  const int n_c=3;
  const int n_s=4;
  const char outfile_prefix[] = "delta_pp_2pt_v3";

  int c, i, icomp;
  int filename_set = 0;
  int append, status;
  int l_LX_at, l_LXstart_at;
  int ix, it, iix, x1,x2,x3;
  int ir, ir2, is;
  int VOL3;
  int do_gt=0;
  int dims[3];
  double *connt=NULL;
  spinor_propagator_type *connq=NULL;
  int verbose = 0;
  int sx0, sx1, sx2, sx3;
  int write_ascii=0;
  int fermion_type = 1;  // Wilson fermion type
  int num_threads=1;
  int pos;
  char filename[200], contype[200], gauge_field_filename[200];
  double ratime, retime;
  //double plaq_m, plaq_r;
  double *work=NULL;
  fermion_propagator_type fp1=NULL, fp2=NULL, fp3=NULL, fp4=NULL, fpaux=NULL, uprop=NULL, dprop=NULL, *stochastic_fp=NULL;
  spinor_propagator_type sp1, sp2;
  double q[3], phase, *gauge_trafo=NULL;
  double *stochastic_source=NULL, *stochastic_prop=NULL;
  complex w, w1;
  size_t items, bytes;
  FILE *ofs;
  int timeslice;
  DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum;
  uint32_t nersc_gauge_field_checksum;

/***********************************************************/
  int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL, qlatt_nclass=0;
  int use_lattice_momenta = 0;
  double **qlatt_list=NULL;
/***********************************************************/

/***********************************************************/
  int rel_momentum_filename_set = 0, rel_momentum_no=0;
  int **rel_momentum_list=NULL;
  char rel_momentum_filename[200];
/***********************************************************/

/***********************************************************/
  int snk_momentum_no = 1;
  int **snk_momentum_list = NULL;
  int snk_momentum_filename_set = 0;
  char snk_momentum_filename[200];
/***********************************************************/

/*******************************************************************
 * Gamma components for the Delta:
 */
  //const int num_component = 16;
  //int gamma_component[2][16] = { {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3}, \
  //                               {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}};
  //double gamma_component_sign[16] = {1., 1.,-1., 1., 1., 1.,-1., 1.,-1.,-1., 1.,-1., 1., 1.,-1., 1.};
  const int num_component = 4;
  int gamma_component[2][4] = { {0, 1, 2, 3},
                                {0, 1, 2, 3} };
  double gamma_component_sign[4] = {+1.,+1.,+1.,+1.};
/*
 *******************************************************************/
  fftw_complex *in=NULL;
#ifdef MPI
   fftwnd_mpi_plan plan_p;
#else
   fftwnd_plan plan_p;
#endif 

#ifdef MPI
  MPI_Status status;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "ah?vgf:t:F:p:P:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'a':
      write_ascii = 1;
      fprintf(stdout, "# [] will write in ascii format\n");
      break;
    case 'F':
      if(strcmp(optarg, "Wilson") == 0) {
        fermion_type = _WILSON_FERMION;
      } else if(strcmp(optarg, "tm") == 0) {
        fermion_type = _TM_FERMION;
      } else {
        fprintf(stderr, "[] Error, unrecognized fermion type\n");
        exit(145);
      }
      fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type);
      break;
    case 't':
      num_threads = atoi(optarg);
      fprintf(stdout, "# [] number of threads set to %d\n", num_threads);
      break;
    case 's':
      use_lattice_momenta = 1;
      fprintf(stdout, "# [] will use lattice momenta\n");
      break;
    case 'p':
      rel_momentum_filename_set = 1;
      strcpy(rel_momentum_filename, optarg);
      fprintf(stdout, "# [] will use current momentum file %s\n", rel_momentum_filename);
      break;
    case 'P':
      snk_momentum_filename_set = 1;
      strcpy(snk_momentum_filename, optarg);
      fprintf(stdout, "# [] will use nucleon momentum file %s\n", snk_momentum_filename);
      break;
    case 'g':
      do_gt = 1;
      fprintf(stdout, "# [] will perform gauge transform\n");
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

#ifdef OPENMP
  omp_set_num_threads(num_threads);
#endif

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

#ifdef OPENMP
  status = fftw_threads_init();
  if(status != 0) {
    fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status);
    exit(120);
  }
#endif

  /******************************************************
   *
   ******************************************************/
  VOL3 = LX*LY*LZ;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  if(N_Jacobi>0) {

    // alloc the gauge field
    alloc_gauge_field(&g_gauge_field, VOL3);
    switch(g_gauge_file_format) {
      case 0:
        sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf);
        break;
      case 1:
        sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf);
        break;
    }
  } else {
    g_gauge_field = NULL;
  }


  /*********************************************************************
   * gauge transformation
   *********************************************************************/
  if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); }

  // determine the source location
  sx0 = g_source_location/(LX*LY*LZ)-Tstart;
  sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ);
  sx2 = (g_source_location%(LY*LZ)) / LZ;
  sx3 = (g_source_location%LZ);
//  g_source_time_slice = sx0;
  fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3);
  source_timeslice = sx0;


  if(!use_lattice_momenta) {
    status = make_qcont_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map);
  } else {
    status = make_qlatt_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map);
  }
  if(status != 0) {
    fprintf(stderr, "\n[] Error while creating h4-lists\n");
    exit(4);
  }
  fprintf(stdout, "# [] number of classes = %d\n", qlatt_nclass);


  /***************************************************************************
   * read the relative momenta q to be used
   ***************************************************************************/
/*
  ofs = fopen(rel_momentum_filename, "r");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for reading\n", rel_momentum_filename);
    exit(6);
  }
  rel_momentum_no = 0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      rel_momentum_no++;
    }
  }
  if(rel_momentum_no == 0) {
    fprintf(stderr, "[] Error, number of momenta is zero\n");
    exit(7);
  } else {
    fprintf(stdout, "# [] number of current momenta = %d\n", rel_momentum_no);
  }
  rewind(ofs);
  rel_momentum_list = (int**)malloc(rel_momentum_no * sizeof(int*));
  rel_momentum_list[0] = (int*)malloc(3*rel_momentum_no * sizeof(int));
  for(i=1;i<rel_momentum_no;i++) { rel_momentum_list[i] = rel_momentum_list[i-1] + 3; }
  count=0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      sscanf(line, "%d%d%d", rel_momentum_list[count], rel_momentum_list[count]+1, rel_momentum_list[count]+2);
      count++;
    }
  }
  fclose(ofs);
  fprintf(stdout, "# [] current momentum list:\n");
  for(i=0;i<rel_momentum_no;i++) {
    fprintf(stdout, "\t%3d%3d%3d%3d\n", i, rel_momentum_list[i][0], rel_momentum_list[i][1], rel_momentum_list[i][2]);
  }
*/

  /***************************************************************************
   * read the nucleon final momenta to be used
   ***************************************************************************/
  ofs = fopen(snk_momentum_filename, "r");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for reading\n", snk_momentum_filename);
    exit(6);
  }
  snk_momentum_no = 0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      snk_momentum_no++;
    }
  }
  if(snk_momentum_no == 0) {
    fprintf(stderr, "[] Error, number of momenta is zero\n");
    exit(7);
  } else {
    fprintf(stdout, "# [] number of nucleon final momenta = %d\n", snk_momentum_no);
  }
  rewind(ofs);
  snk_momentum_list = (int**)malloc(snk_momentum_no * sizeof(int*));
  snk_momentum_list[0] = (int*)malloc(3*snk_momentum_no * sizeof(int));
  for(i=1;i<snk_momentum_no;i++) { snk_momentum_list[i] = snk_momentum_list[i-1] + 3; }
  count=0;
  while( fgets(line, 199, ofs) != NULL) {
    if(line[0] != '#') {
      sscanf(line, "%d%d%d", snk_momentum_list[count], snk_momentum_list[count]+1, snk_momentum_list[count]+2);
      count++;
    }
  }
  fclose(ofs);
  fprintf(stdout, "# [] the nucleon final momentum list:\n");
  for(i=0;i<snk_momentum_no;i++) {
    fprintf(stdout, "\t%3d%3d%3d%3d\n", i, snk_momentum_list[i][0], snk_momentum_list[i][1], snk_momentum_list[i][1], snk_momentum_list[i][2]);
  }



  /***********************************************************
   * allocate memory for the spinor fields
   ***********************************************************/
  g_spinor_field = NULL;
  if(fermion_type == _TM_FERMION) {
    no_fields = 2*n_s*n_c+3;
  } else {
    no_fields =   n_s*n_c+3;
  }
  if(N_Jacobi>0) no_fields++;

  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields-2; i++) alloc_spinor_field(&g_spinor_field[i], VOL3);
  // work
  if(N_Jacobi>0) work = g_spinor_field[no_fields-4];
  // stochastic_fv
  stochastic_fv = g_spinor_field[no_fields-3];
  // stochastic source and propagator
  alloc_spinor_field(&g_spinor_field[no_fields-2], VOLUME);
  stochastic_source = g_spinor_field[no_fields-2];
  alloc_spinor_field(&g_spinor_field[no_fields-1], VOLUME);
  stochastic_prop   = g_spinor_field[no_fields-1];


  spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) );
  if(spinor_field_checksum == NULL ) {
    fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n");
    exit(73);
  }
  
  /*************************************************
   * allocate memory for the contractions
   *************************************************/
  items = 4* num_component*T;
  bytes = sizeof(double);
  connt = (double*)malloc(items*bytes);
  if(connt == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connt\n");
    exit(2);
  }
  for(ix=0; ix<items; ix++) connt[ix] = 0.;

  items = num_component * (size_t)VOL3;
  connq = create_sp_field( items );
  if(connq == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connq\n");
    exit(2);
  }

  items = (size_t)VOL3;
  stochastic_fp = create_sp_field( items );
  if(stochastic_fp== NULL) {
    fprintf(stderr, "\n[] Error, could not alloc stochastic_fp\n");
    exit(22);
  }

  /******************************************************
   * initialize FFTW
   ******************************************************/
  items = g_fv_dim * (size_t)VOL3;
  bytes = sizeof(fftw_complex);
  in  = (fftw_complex*)malloc( items * bytes );
  if(in == NULL) {
    fprintf(stderr, "[] Error, could not malloc in for FFTW\n");
    exit(155);
  }
  dims[0]=LX; dims[1]=LY; dims[2]=LZ;
  //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, g_fv_dim, (fftw_complex*)( stochastic_fv ), g_fv_dim);

  // create the fermion propagator points
  create_fp(&uprop);
  create_fp(&dprop);
  create_fp(&fp1);
  create_fp(&fp2);
  create_fp(&fp3);
  create_fp(&stochastic_fp);
  create_sp(&sp1);
  create_sp(&sp2);


  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  // !! implement twisting for _TM_FERMION
  // !!
  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ifdef OPENMP
#pragma omp parallel for private(ix) shared(stochastic_prop)
#endif
  for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(stochastic_prop+_GSI(ix)); }

  for(sid=g_sourceid; sid<=g_sourceid2;sid+=g_sourceid_step) {
    switch(g_soruce_type) {
      case 2:  // timeslice source
        sprintf(filename, "%s.%.4d.%.2d.%.5d.inverted", filename_prefix, Nconf, source_timeslice, sid);
        break;
      default:
        fprintf(stderr, "# [] source type %d not implented; exit\n", g_source_type);
        exit(100);
    }
    fprintf(stdout, "# [] trying to read sample up-prop. from file %s\n", filename);
    read_lime_spinor(stochastic_source, filename, 0);
#ifdef OPENMP
#pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source)
#endif
    for(ix=0;ix<VOLUME;ix++) { _fv_pl_eq_fv(stochastic_prop+_GSI(ix), stochastic_source+_GSI(ix)); }
  }
#ifdef OPENMP
#pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source)
#endif
  fnorm = 1. / ( (double)(g_sourceid2 - g_sourceid + 1) * g_prop_normsqr );
  for(ix=0;ix<VOLUME;ix++) { _fv_ti_eq_re(stochastic_prop+_GSI(ix), fnorm); }
  //  calculate the source
  if(fermion_type && g_propagator_bc_type == 1) {
    Q_Wilson_phi(stochastic_source, stochastic_prop);
  } else {
    Q_phi_tbc(stochastic_source, stochastic_prop);
  }

  /******************************************************
   * prepare the stochastic fermion field
   ******************************************************/
  // read timeslice of the gauge field
  if( N_Jacobi>0) {
    switch(g_gauge_file_format) {
      case 0:
        status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &ildg_gauge_field_checksum);
        break;
      case 1:
        status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &nersc_gauge_field_checksum);
        break;
    }
    if(status != 0) {
      fprintf(stderr, "[] Error, could not read gauge field\n");
      exit(21);
    }
    for(i=0; i<N_ape; i++) {
#ifdef OPENMP
      status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape);
#else
      status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape);
#endif
    }
  }
  // read timeslice of the 12 up-type propagators and smear them
  //
  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  // !! implement twisting for _TM_FERMION
  // !!
  // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  for(is=0;is<n_s*n_c;is++) {
    if(fermion_type != _TM_FERMION) {
      sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is);
    } else {
      sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is);
    }
    status = read_lime_spinor_timeslice(g_spinor_field[is], source_timeslice, filename, 0, spinor_field_checksum+is);
    if(status != 0) {
      fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
      exit(102);
    }
    if(N_Jacobi > 0) {
      fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
          is, N_Jacobi, kappa_Jacobi);
      for(c=0; c<N_Jacobi; c++) {
#ifdef OPENMP
        Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#else
        Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#endif
      }
    }
  }
  for(is=0;is<g_fv_dim;is++) {
    for(ix=0;ix<VOL3;ix++) {
      iix = source_timeslice * VOL3 + ix;
      _fv_eq_gamma_ti_fv(spinor1, 5, g_spinor_field[is]+_GSI(iix));
      _co_eq_fv_dagger_ti_fv(&w, stochastic_source+_GSI(ix), spinor1);
      stochastic_fv[_GSI(ix)+2*is  ] = w.re;
      stochastic_fv[_GSI(ix)+2*is+1] = w.im;
    }
  }
  // Fourier transform
  items = g_fv_dim * (size_t)VOL3;
  bytes = sizeof(double);
  memcpy(in, stochastic_fv, items*bytes );
#ifdef OPENMP
  fftwnd_threads(num_threads, plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1);
#else
  fftwnd(plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1);
#endif


  /******************************************************
   * loop on sink momenta (most likely only one: Q=(0,0,0))
   ******************************************************/
  for(imom_snk=0;imom_snk<snk_momentum_no; imom_snk++) {

    // create Phi_tilde
    _fv_eq_zero( spinor2 );
    for(ix=0;ix<LX;ix++) {
    for(iy=0;iy<LY;iy++) {
    for(iz=0;iz<LZ;iz++) {
      iix = timeslice * VOL3 + ix;
      phase = -2.*M_PI*( (ix-sx1) * snk_momentum_list[imom_snk][0] / (double)LX 
                       + (iy-sx2) * snk_momentum_list[imom_snk][1] / (double)LY 
                       + (iz-sx3) * snk_momentum_list[imom_snk][2] / (double)LZ);
      w.re = cos(phase);
      w.im = sin(phase);
      _fv_eq_fv_ti_co(spinor1, stochastic_prop + _GSI(iix), &w);
      _fv_pl_eq_fv(spinor2, spinor);
    }}}
    // create Theta
    for(ir=0;ir<g_fv_dim;ir++) {
    for(is=0;is<g_fv_dim;is++) {
      _co_eq_co_ti_co( &(stochastic_fp[ix][ir][2*is]), &(spinor2[2*ir]), &(stochastic_fv[_GSI(ix)+2*is]) );
    }}

    /******************************************************
     * loop on timeslices
     ******************************************************/
    for(timeslice=0; timeslice<T; timeslice++) {
      append = (int)( timeslice != 0 );

      // read timeslice of the gauge field
      if( N_Jacobi>0) {
        switch(g_gauge_file_format) {
          case 0:
            status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum);
            break;
          case 1:
            status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum);
            break;
        }
        if(status != 0) {
          fprintf(stderr, "[] Error, could not read gauge field\n");
          exit(21);
        }

        for(i=0; i<N_ape; i++) {
#ifdef OPENMP
          status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape);
#else
          status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape);
#endif
        }

      }

      // read timeslice of the 12 up-type propagators and smear them
      for(is=0;is<n_s*n_c;is++) {
          sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is);
          status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is);
          if(status != 0) {
            fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
            exit(102);
          }
          if(N_Jacobi > 0) {
            fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
                is, N_Jacobi, kappa_Jacobi);
            for(c=0; c<N_Jacobi; c++) {
#ifdef OPENMP
              Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#else
              Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
#endif
            }
          }
      }

      if(fermion_type == _TM_FERMION) {
        // read timeslice of the 12 down-type propagators, smear them
        for(is=0;is<n_s*n_c;is++) {
          if(do_gt == 0) {
            sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is);
            status = read_lime_spinor_timeslice(g_spinor_field[n_s*n_c+is], timeslice, filename, 0, spinor_field_checksum+n_s*n_c+is);
            if(status != 0) {
              fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
              exit(102);
            }
            if(N_Jacobi > 0) {
              fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
                   is, N_Jacobi, kappa_Jacobi);
              for(c=0; c<N_Jacobi; c++) {
#ifdef OPENMP
                Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi);
#else
                Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi);
#endif
              }
            }
        }
      }

  
      /******************************************************
       * contractions
       ******************************************************/
      for(ix=0;ix<VOL3;ix++) 
      //for(ix=0;ix<1;ix++) 
      {
  
        // assign the propagators
        _assign_fp_point_from_field(uprop, g_spinor_field, ix);
        if(fermion_type==_TM_FERMION) {
          _assign_fp_point_from_field(dprop, g_spinor_field+n_s*n_c, ix);
        } else {
          _fp_eq_fp(dprop, uprop);
        }
        flavor rotation for twisted mass fermions
        if(fermion_type == _TM_FERMION) {
          _fp_eq_rot_ti_fp(fp1, uprop, +1, fermion_type, fp2);
          _fp_eq_fp_ti_rot(uprop, fp1, +1, fermion_type, fp2);
  //        _fp_eq_rot_ti_fp(fp1, dprop, -1, fermion_type, fp2);
  //        _fp_eq_fp_ti_rot(dprop, fp1, -1, fermion_type, fp2);
        }
  
        // test: print fermion propagator point
        //printf_fp(uprop, stdout);
  
  
        for(icomp=0; icomp<num_component; icomp++) {
  
          _sp_eq_zero( connq[ix*num_component+icomp]);
  
          /******************************************************
           * first contribution
           ******************************************************/
          _fp_eq_zero(fp1);
          _fp_eq_zero(fp2);
          _fp_eq_zero(fp3);
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
  
          // S_u x C Gamma_2 = S_u x g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, uprop);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3);
    
          // first part
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop);
          // reduce to spin propagator
          _sp_eq_zero( sp1 );
          _sp_eq_fp_del_contract23_fp(sp1, fp2, fp3);
          // second part
          // reduce to spin propagator
          _sp_eq_zero( sp2 );
          _sp_eq_fp_del_contract24_fp(sp2, fp2, fp3);
          // add and assign
          _sp_pl_eq_sp(sp1, sp2);
          _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]);
          _sp_eq_sp( connq[ix*num_component+icomp], sp2);
  
          /******************************************************
           * second contribution
           ******************************************************/
          _fp_eq_zero(fp1);
          _fp_eq_zero(fp2);
          _fp_eq_zero(fp3);
          // first part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u 
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 (same S_u as above)
          _fp_eq_fp_ti_gamma(fp2, 0, fp1);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop);
          // reduce to spin propagator
          _sp_eq_zero( sp1 );
          _sp_eq_fp_del_contract23_fp(sp1, uprop, fp3);
          // second part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, uprop);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2);
          // reduce to spin propagator
          _sp_eq_zero( sp2 );
          _sp_eq_fp_del_contract24_fp(sp2, uprop, fp3);
          // add and assign
          _sp_pl_eq_sp(sp1, sp2);
          _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]);
          _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2);
  
          /******************************************************
           * third contribution
           ******************************************************/
          _fp_eq_zero(fp1);
          _fp_eq_zero(fp2);
          _fp_eq_zero(fp3);
          // first part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, fp1);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop);
          // reduce to spin propagator
          _sp_eq_zero( sp1 );
          _sp_eq_fp_del_contract34_fp(sp1, uprop, fp3);
          // second part
          // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
          _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop);
          _fp_eq_gamma_ti_fp(fp3, 2, fp1);
          _fp_eq_gamma_ti_fp(fp1, 0, fp3);
          // S_u x C Gamma_2 = S_u g0 g2 Gamma_2
          _fp_eq_fp_ti_gamma(fp2, 0, uprop);
          _fp_eq_fp_ti_gamma(fp3, 2, fp2);
          _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3);
          // reduce
          _fp_eq_zero(fp3);
          _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2);
          // reduce to spin propagator
          _sp_eq_zero( sp2 );
          _sp_eq_fp_del_contract34_fp(sp2, uprop, fp3);
          // add and assign
          _sp_pl_eq_sp(sp1, sp2);
          _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]);
          _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2);
  
        }  // of icomp
  
      }    // of ix
  
      /***********************************************
       * finish calculation of connq
       ***********************************************/
      if(g_propagator_bc_type == 0) {
        // multiply with phase factor
        fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice);
        ir = (timeslice - sx0 + T_global) % T_global;
        w1.re = cos( 3. * M_PI*(double)ir / (double)T_global );
        w1.im = sin( 3. * M_PI*(double)ir / (double)T_global );
        for(ix=0;ix<num_component*VOL3;ix++) {
          _sp_eq_sp(sp1, connq[ix] );
          _sp_eq_sp_ti_co( connq[ix], sp1, w1);
        }
      } else if (g_propagator_bc_type == 1) {
        // multiply with step function
        if(timeslice < sx0) {
          fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice);
          for(ix=0;ix<num_component*VOL3;ix++) {
            _sp_eq_sp(sp1, connq[ix] );
            _sp_eq_sp_ti_re( connq[ix], sp1, -1.);
          }
        }
      }
    
      if(write_ascii) {
        sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
        write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
      }
  
      /******************************************************************
       * Fourier transform
       ******************************************************************/
      items =  2 * num_component * g_sv_dim * g_sv_dim * VOL3;
      bytes = sizeof(double);
  
      memcpy(in, connq[0][0], items * bytes);
      ir = num_component * g_sv_dim * g_sv_dim;
  #ifdef OPENMP
      fftwnd_threads(num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
  #else
      fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
  #endif
  
      // add phase factor from the source location
      iix = 0;
      for(x1=0;x1<LX;x1++) {
        q[0] = (double)x1 / (double)LX;
      for(x2=0;x2<LY;x2++) {
        q[1] = (double)x2 / (double)LY;
      for(x3=0;x3<LZ;x3++) {
        q[2] = (double)x3 / (double)LZ;
        phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 );
        w1.re = cos(phase);
        w1.im = sin(phase);
  
        for(icomp=0; icomp<num_component; icomp++) {
          _sp_eq_sp(sp1, connq[iix] );
          _sp_eq_sp_ti_co( connq[iix], sp1, w1) ;
          iix++; 
        }
      }}}  // of x3, x2, x1
  
      // write to file
      sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d.Qx%.2dQy%.2dQz%.2d.%.5d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3,
         qlatt_rep[snk_momentum_list[imom_snk]][1],qlatt_rep[snk_momentum_list[imom_snk]][2],qlatt_rep[snk_momentum_list[imom_snk]][3],
         g_sourceid2-g_sourceid+1);
      sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0);
      write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice);
  
      if(write_ascii) {
        strcat(filename, ".ascii");
        write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
      }
  
  
      /***********************************************
       * calculate connt
       ***********************************************/
      for(icomp=0;icomp<num_component; icomp++) {
        // fwd
        _sp_eq_sp(sp1, connq[icomp]);
        _sp_eq_gamma_ti_sp(sp2, 0, sp1);
        _sp_pl_eq_sp(sp1, sp2);
        _co_eq_tr_sp(&w, sp1);
        connt[2*(icomp*T + timeslice)  ] = w.re * 0.25;
        connt[2*(icomp*T + timeslice)+1] = w.im * 0.25;
        // bwd
        _sp_eq_sp(sp1, connq[icomp]);
        _sp_eq_gamma_ti_sp(sp2, 0, sp1);
        _sp_mi_eq_sp(sp1, sp2);
        _co_eq_tr_sp(&w, sp1);
        connt[2*(icomp*T+timeslice + num_component*T)  ] = w.re * 0.25;
        connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25;
      }
  
    }  // of loop on timeslice

    // write connt
    sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
    ofs = fopen(filename, "w");
    if(ofs == NULL) {
      fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
      exit(3);
    }
    fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);
  
    for(icomp=0; icomp<num_component; icomp++) {
      ir = sx0;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf);
      for(it=1;it<T/2;it++) {
        ir  = ( it + sx0 ) % T_global;
        ir2 = ( (T_global - it) + sx0 ) % T_global;
        fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf);
      }
      ir = ( it + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf);
    }
    fclose(ofs);
  
    sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
    ofs = fopen(filename, "w");
    if(ofs == NULL) {
      fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
      exit(3);
    }
    fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);
  
    for(icomp=0; icomp<num_component; icomp++) {
      ir = sx0;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
      for(it=1;it<T/2;it++) {
        ir  = ( it + sx0 ) % T_global;
        ir2 = ( (T_global - it) + sx0 ) % T_global;
        fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf);
      }
      ir = ( it + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
    }
    fclose(ofs);

  }  // of loop on sink momentum ( = Delta^++ momentum, Qvec)

  /***********************************************
   * free the allocated memory, finalize
   ***********************************************/
  free_geometry();
  if(connt!= NULL) free(connt);
  if(connq!= NULL) free(connq);
  if(gauge_trafo != NULL) free(gauge_trafo);

  if(g_spinor_field!=NULL) {
    for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
    free(g_spinor_field); g_spinor_field=(double**)NULL;
  }
  if(spinor_field_checksum !=NULL) free(spinor_field_checksum);
  if(g_gauge_field != NULL) free(g_gauge_field);

  if(snk_momemtum_list != NULL) {
    if(snk_momentum_list[0] != NULL) free(snk_momentum_list[0]);
    free(snk_momentum_list);
  }
  if(rel_momemtum_list != NULL) {
    if(rel_momentum_list[0] != NULL) free(rel_momentum_list[0]);
    free(rel_momentum_list);
  }

  // free the fermion propagator points
  free_fp( &uprop );
  free_fp( &dprop );
  free_fp( &fp1 );
  free_fp( &fp2 );
  free_fp( &fp3 );
  free_sp( &sp1 );
  free_sp( &sp2 );

  free(in);
  fftwnd_destroy_plan(plan_p);

  g_the_time = time(NULL);
  fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stderr);

#ifdef MPI
  MPI_Finalize();
#endif
  return(0);
}
Example #21
0
int main(int argc, char **argv) {
  
  int c, i, mu, status;
  int ispin, icol, isc;
  int n_c = 3;
  int n_s = 4;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int grid_size[4];
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix, iy, is, it, i3;
  int sl0, sl1, sl2, sl3, have_source_flag=0;
  int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3;
  int check_residuum = 0;
  unsigned int VOL3, V5;
  int do_gt   = 0;
  int full_orbit = 0;
  int smear_source = 0;
  char filename[200], source_filename[200], source_filename_write[200];
  double ratime, retime;
  double plaq_r=0., plaq_m=0., norm, norm2;
  double spinor1[24];
  double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL;
  double _1_2_kappa, _2_kappa, phase;
  FILE *ofs;
  int mu_trans[4] = {3, 0, 1, 2};
  int threadid, nthreads;
  int timeslice, source_timeslice;
  char rng_file_in[100], rng_file_out[100];
  int *source_momentum=NULL;
  int source_momentum_class = -1;
  int source_momentum_no = 0;
  int source_momentum_runs = 1;
  int imom;
  int num_gpu_on_node=0, rank;
  int source_location_5d_iseven;
  int convert_sign=0;
#ifdef HAVE_QUDA
  int rotate_gamma_basis = 1;
#else
  int rotate_gamma_basis = 0;
#endif
  omp_lock_t *lck = NULL, gen_lck[1];
  int key = 0;


  /****************************************************************************/
  /* for smearing parallel to inversion                                       */
  double *smearing_spinor_field[] = {NULL,NULL};
  int dummy_flag = 0;
  /****************************************************************************/


  /****************************************************************************/
#if (defined HAVE_QUDA) && (defined MULTI_GPU)
  int x_face_size, y_face_size, z_face_size, t_face_size, pad_size;
#endif
  /****************************************************************************/

  /************************************************/
  int qlatt_nclass;
  int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL;
  double **qlatt_list=NULL;
  /************************************************/

  /************************************************/
  double boundary_condition_factor;
  int boundary_condition_factor_set = 0;
  /************************************************/

//#ifdef MPI       
//  kernelPackT = true;
//#endif

  /***********************************************
   * QUDA parameters
   ***********************************************/
#ifdef HAVE_QUDA
  QudaPrecision cpu_prec         = QUDA_DOUBLE_PRECISION;
  QudaPrecision cuda_prec        = QUDA_DOUBLE_PRECISION;
  QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION;

  QudaGaugeParam gauge_param = newQudaGaugeParam();
  QudaInvertParam inv_param = newQudaInvertParam();
#endif

  while ((c = getopt(argc, argv, "soch?vgf:p:b:S:R:")) != -1) {
    switch (c) {
    case 'v':
      g_verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'c':
      check_residuum = 1;
      fprintf(stdout, "# [invert_dw_quda] will check residuum again\n");
      break;
    case 'p':
      n_c = atoi(optarg);
      fprintf(stdout, "# [invert_dw_quda] will use number of colors = %d\n", n_c);
      break;
    case 'o':
      full_orbit = 1;
      fprintf(stdout, "# [invert_dw_quda] will invert for full orbit, if source momentum set\n");
    case 's':
      smear_source = 1;
      fprintf(stdout, "# [invert_dw_quda] will smear the sources if they are read from file\n");
      break;
    case 'b':
      boundary_condition_factor = atof(optarg);
      boundary_condition_factor_set = 1;
      fprintf(stdout, "# [invert_dw_quda] const. boundary condition factor set to %e\n", boundary_condition_factor);
      break;
    case 'S':
      convert_sign = atoi(optarg);
      fprintf(stdout, "# [invert_dw_quda] using convert sign %d\n", convert_sign);
      break;
    case 'R':
      rotate_gamma_basis = atoi(optarg);
      fprintf(stdout, "# [invert_dw_quda] rotate gamma basis %d\n", rotate_gamma_basis);
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  // get the time stamp
  g_the_time = time(NULL);

  /**************************************
   * set the default values, read input
   **************************************/
  if(filename_set==0) strcpy(filename, "cvc.input");
  if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

#ifdef MPI
#ifdef HAVE_QUDA
  grid_size[0] = g_nproc_x;
  grid_size[1] = g_nproc_y;
  grid_size[2] = g_nproc_z;
  grid_size[3] = g_nproc_t;
  fprintf(stdout, "# [] g_nproc = (%d,%d,%d,%d)\n", g_nproc_x, g_nproc_y, g_nproc_z, g_nproc_t);
  initCommsQuda(argc, argv, grid_size, 4);
#else
  MPI_Init(&argc, &argv);
#endif
#endif

#if (defined PARALLELTX) || (defined PARALLELTXY)
  EXIT_WITH_MSG(1, "[] Error, 2-dim./3-dim. MPI-Version not yet implemented");
#endif


  // some checks on the input data
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stderr, "[invert_dw_quda] Error, T and L's must be set\n");
    usage();
  }

  // set number of openmp threads

  // initialize MPI parameters
  mpi_init(argc, argv);
  
  // the volume of a timeslice
  VOL3 = LX*LY*LZ;
  V5   = T*LX*LY*LZ*L5;
  g_kappa5d = 0.5 / (5. + g_m5);
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] kappa5d = %e\n", g_kappa5d);

  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] L5           = %3d\n",\
                  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, L5);


#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "[invert_dw_quda] Error from init_geometry\n");
    EXIT(1);
  }
  geometry();

  if( init_geometry_5d() != 0 ) {
    fprintf(stderr, "[invert_dw_quda] Error from init_geometry_5d\n");
    EXIT(2);
  }
  geometry_5d();

  /**************************************
   * initialize the QUDA library
   **************************************/
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] initializing quda\n");
#ifdef HAVE_QUDA
  // cudaGetDeviceCount(&num_gpu_on_node);
  if(g_gpu_per_node<0) {
    if(g_cart_id==0) fprintf(stderr, "[] Error, number of GPUs per node not set\n");
    EXIT(106);
  } else {
    num_gpu_on_node = g_gpu_per_node;
  }
#ifdef MPI
  rank = comm_rank();
#else
  rank = 0;
#endif
  g_gpu_device_number = rank % num_gpu_on_node;
  fprintf(stdout, "# [] process %d/%d uses device %d\n", rank, g_cart_id, g_gpu_device_number);

  initQuda(g_gpu_device_number);

#endif
 
  /**************************************
   * prepare the gauge field
   **************************************/
  // read the gauge field from file
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  if(strcmp( gaugefilename_prefix, "identity")==0 ) {
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up unit gauge field\n");
    for(ix=0;ix<VOLUME; ix++) {
      for(mu=0;mu<4;mu++) {
        _cm_eq_id(g_gauge_field+_GGI(ix,mu));
      }
    }
  } else if(strcmp( gaugefilename_prefix, "random")==0 ) {
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up random gauge field with seed = %d\n", g_seed);
    init_rng_state(g_seed, &g_rng_state);
    random_gauge_field(g_gauge_field, 1.);
    plaquette(&plaq_m);
    sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
    check_error(write_lime_gauge_field(filename, plaq_m, Nconf, 64), "write_lime_gauge_field", NULL, 12);
  } else {
    if(g_gauge_file_format == 0) {
      // ILDG
      sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
      if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename);
      status = read_lime_gauge_field_doubleprec(filename);
    } else if(g_gauge_file_format == 1) {
      // NERSC
      sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf);
      if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename);
      status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r);
      //status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq_r);

    }
    if(status != 0) {
      fprintf(stderr, "[invert_dw_quda] Error, could not read gauge field");
      EXIT(12);
    }
  }
#ifdef MPI
  xchange_gauge();
#endif

  // measure the plaquette
  plaquette(&plaq_m);
  if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m);
  if(g_cart_id==0) fprintf(stdout, "# Read plaquette value    : %25.16e\n", plaq_r);

#ifndef HAVE_QUDA
  if(N_Jacobi>0) {
#endif
    // allocate the smeared / qdp ordered gauge field
    alloc_gauge_field(&gauge_field_smeared, VOLUMEPLUSRAND);
    for(i=0;i<4;i++) {
      gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME;
    }
#ifndef HAVE_QUDA
  }
#endif

#ifdef HAVE_QUDA
  // transcribe the gauge field

  omp_set_num_threads(g_num_threads);
#pragma omp parallel for private(ix,iy,mu)
  for(ix=0;ix<VOLUME;ix++) {
    iy = g_lexic2eot[ix];
    for(mu=0;mu<4;mu++) {
      _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu));
    }
  }
  // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition)
  if(g_proc_coords[0]==g_nproc_t-1) {
    if(!boundary_condition_factor_set) boundary_condition_factor = -1.;
    fprintf(stdout, "# [] process %d multiplies gauge-field timeslice T_global-1 with boundary condition factor %e\n", g_cart_id,
      boundary_condition_factor);

  omp_set_num_threads(g_num_threads);
#pragma omp parallel for private(ix,iy)
    for(ix=0;ix<VOL3;ix++) {
      iix = (T-1)*VOL3 + ix;
      iy = g_lexic2eot[iix];
      _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.);
    }
  }

  // QUDA precision parameters
  switch(g_cpu_prec) {
    case 0: cpu_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = half\n"); break;
    case 1: cpu_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = single\n"); break;
    case 2: cpu_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = double\n"); break;
    default: cpu_prec = QUDA_DOUBLE_PRECISION; break;
  }
  switch(g_gpu_prec) {
    case 0: cuda_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = half\n"); break;
    case 1: cuda_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = single\n"); break;
    case 2: cuda_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = double\n"); break;
    default: cuda_prec = QUDA_DOUBLE_PRECISION; break;
  }
  switch(g_gpu_prec_sloppy) {
    case 0: cuda_prec_sloppy = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = half\n"); break;
    case 1: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = single\n"); break;
    case 2: cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = double\n"); break;
    default: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; break;
  }

  // QUDA gauge parameters
  gauge_param.X[0] = LX;
  gauge_param.X[1] = LY;
  gauge_param.X[2] = LZ;
  gauge_param.X[3] = T;
  inv_param.Ls = L5;

  gauge_param.anisotropy  = 1.0;
  gauge_param.type        = QUDA_WILSON_LINKS;
  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
  gauge_param.t_boundary  = QUDA_ANTI_PERIODIC_T;

  gauge_param.cpu_prec           = cpu_prec;
  gauge_param.cuda_prec          = cuda_prec;
  gauge_param.reconstruct        = QUDA_RECONSTRUCT_12;
  gauge_param.cuda_prec_sloppy   = cuda_prec_sloppy;
  gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
  gauge_param.gauge_fix          = QUDA_GAUGE_FIXED_NO;

  gauge_param.ga_pad = 0;
  inv_param.sp_pad = 0;
  inv_param.cl_pad = 0;

  // For multi-GPU, ga_pad must be large enough to store a time-slice
#ifdef MULTI_GPU
  x_face_size = inv_param.Ls * gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
  y_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
  z_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
  t_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
  pad_size = _MAX(x_face_size, y_face_size);
  pad_size = _MAX(pad_size, z_face_size);
  pad_size = _MAX(pad_size, t_face_size);
  gauge_param.ga_pad = pad_size;
  if(g_cart_id==0) printf("# [invert_dw_quda] pad_size = %d\n", pad_size);
#endif

  // load the gauge field
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] loading gauge field\n");
  loadGaugeQuda((void*)gauge_qdp, &gauge_param);
  gauge_qdp[0] = NULL; 
  gauge_qdp[1] = NULL; 
  gauge_qdp[2] = NULL; 
  gauge_qdp[3] = NULL; 

#endif

  /*********************************************
   * APE smear the gauge field
   *********************************************/
  if(N_Jacobi>0) {
    memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUMEPLUSRAND*sizeof(double));
    fprintf(stdout, "# [invert_dw_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape);
    APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape);
    xchange_gauge_field(gauge_field_smeared);
  }

  // allocate memory for the spinor fields
#ifdef HAVE_QUDA
  no_fields = 3+2;
#else
  no_fields = 6+2;
#endif
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND*L5);
  smearing_spinor_field[0] = g_spinor_field[no_fields-2];
  smearing_spinor_field[1] = g_spinor_field[no_fields-1];

  switch(g_source_type) {
    case 0:
    case 5:
      // the source locaton
      sl0 =   g_source_location                              / (LX_global*LY_global*LZ);
      sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / (          LY_global*LZ);
      sl2 = ( g_source_location % (          LY_global*LZ) ) / (                    LZ);
      sl3 =   g_source_location %                      LZ;
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3);
      source_proc_coords[0] = sl0 / T;
      source_proc_coords[1] = sl1 / LX;
      source_proc_coords[2] = sl2 / LY;
      source_proc_coords[3] = sl3 / LZ;
    #ifdef MPI
      MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id);
    #else
      g_source_proc_id = 0;
    #endif
      have_source_flag = g_source_proc_id == g_cart_id;
    
      lsl0 = sl0 % T;
      lsl1 = sl1 % LX;
      lsl2 = sl2 % LY;
      lsl3 = sl3 % LZ;
      if(have_source_flag) {
        fprintf(stdout, "# [invert_dw_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3);
      }
      break;
    case 2:
    case 3:
    case 4:
      // the source timeslice
#ifdef MPI
      source_proc_coords[0] = g_source_timeslice / T;
      source_proc_coords[1] = 0;
      source_proc_coords[2] = 0;
      source_proc_coords[3] = 0;
      MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id);
      have_source_flag = ( g_source_proc_id == g_cart_id );
      source_timeslice = have_source_flag ? g_source_timeslice % T : -1;
#else
      g_source_proc_id = 0;
      have_source_flag = 1;
      source_timeslice = g_source_timeslice;
#endif
      break;
  }

#ifdef HAVE_QUDA
  /*************************************************************
   * QUDA inverter parameters
   *************************************************************/
  inv_param.dslash_type    = QUDA_DOMAIN_WALL_DSLASH;

  if(strcmp(g_inverter_type_name, "cg") == 0) {
    inv_param.inv_type       = QUDA_CG_INVERTER;
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using cg inverter\n"); 
  } else if(strcmp(g_inverter_type_name, "bicgstab") == 0) {
    inv_param.inv_type       = QUDA_BICGSTAB_INVERTER;
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using bicgstab inverter\n");
#ifdef MULTI_GPU    
  } else if(strcmp(g_inverter_type_name, "gcr") == 0) {
    inv_param.inv_type       = QUDA_GCR_INVERTER;
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using gcr inverter\n"); 
#endif
  } else {
    if(g_cart_id==0) fprintf(stderr, "[invert_dw_quda] Error, unrecognized inverter type %s\n", g_inverter_type_name);
    EXIT(123);
  }


  if(inv_param.inv_type == QUDA_CG_INVERTER) {
    inv_param.solution_type = QUDA_MAT_SOLUTION;
    inv_param.solve_type    = QUDA_NORMEQ_PC_SOLVE;
  } else if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER) {
    inv_param.solution_type = QUDA_MAT_SOLUTION;
    inv_param.solve_type    = QUDA_DIRECT_PC_SOLVE;
  } else {
    inv_param.solution_type = QUDA_MATPC_SOLUTION;
    inv_param.solve_type    = QUDA_DIRECT_PC_SOLVE;
  }

  inv_param.m5             = g_m5;
  inv_param.kappa          = 0.5 / (5. + inv_param.m5);
  inv_param.mass           = g_m0;

  inv_param.tol            = solver_precision;
  inv_param.maxiter        = niter_max;
  inv_param.reliable_delta = reliable_delta;

#ifdef MPI
  // domain decomposition preconditioner parameters
  if(inv_param.inv_type == QUDA_GCR_INVERTER) {
    if(g_cart_id == 0) printf("# [] settup DD parameters\n");
    inv_param.gcrNkrylov     = 15;
    inv_param.inv_type_precondition = QUDA_MR_INVERTER;
    inv_param.tol_precondition = 1e-6;
    inv_param.maxiter_precondition = 200;
    inv_param.verbosity_precondition = QUDA_VERBOSE;
    inv_param.prec_precondition = cuda_prec_sloppy;
    inv_param.omega = 0.7;
  }
#endif

  inv_param.matpc_type         = QUDA_MATPC_EVEN_EVEN;
  inv_param.dagger             = QUDA_DAG_NO;
  inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION;

  inv_param.cpu_prec         = cpu_prec;
  inv_param.cuda_prec        = cuda_prec;
  inv_param.cuda_prec_sloppy = cuda_prec_sloppy;

  inv_param.verbosity = QUDA_VERBOSE;

  inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
  inv_param.dirac_order = QUDA_DIRAC_ORDER;
#ifdef MPI
  inv_param.preserve_dirac = QUDA_PRESERVE_DIRAC_YES;
  inv_param.prec_precondition = cuda_prec_sloppy;
  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
  inv_param.dirac_tune = QUDA_TUNE_NO;
#endif
#endif

  /*******************************************
   * write initial rng state to file
   *******************************************/
  if( g_source_type==2 && g_coherent_source==2 ) {
    sprintf(rng_file_out, "%s.0", g_rng_filename);
    status = init_rng_stat_file (g_seed, rng_file_out);
    if( status != 0 ) {
      fprintf(stderr, "[invert_dw_quda] Error, could not write rng status\n");
      EXIT(210);
    }
  } else if( (g_source_type==2 /*&& g_coherent_source==1*/) || g_source_type==3 || g_source_type==4) {
    if( init_rng_state(g_seed, &g_rng_state) != 0 ) {
      fprintf(stderr, "[invert_dw_quda] Error, could initialize rng state\n");
      EXIT(211);
    }
  }

  /*******************************************
   * prepare locks for openmp
   *******************************************/
  nthreads = g_num_threads - 1;
  lck = (omp_lock_t*)malloc(nthreads * sizeof(omp_lock_t));
  if(lck == NULL) {
      EXIT_WITH_MSG(97, "[invert_dw_quda] Error, could not allocate lck\n");
  }
  // init locks
  for(i=0;i<nthreads;i++) {
    omp_init_lock(lck+i);
  }
  omp_init_lock(gen_lck);

  // check the source momenta
  if(g_source_momentum_set) {
    source_momentum = (int*)malloc(3*sizeof(int));

    if(g_source_momentum[0]<0) g_source_momentum[0] += LX_global;
    if(g_source_momentum[1]<0) g_source_momentum[1] += LY_global;
    if(g_source_momentum[2]<0) g_source_momentum[2] += LZ_global;
    fprintf(stdout, "# [invert_dw_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]);


    if(full_orbit) {
      status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map);
      if(status != 0) {
        if(g_cart_id==0) fprintf(stderr, "\n[invert_dw_quda] Error while creating O_3-lists\n");
        EXIT(4);
      }
      source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]];
      source_momentum_no    = qlatt_count[source_momentum_class];
      source_momentum_runs  = source_momentum_class==0 ? 1 : source_momentum_no + 1;
      if(g_cart_id==0) fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n",
          source_momentum_class, source_momentum_no, source_momentum_runs);
    }
  }

  if(g_source_type == 5) {
    if(g_seq_source_momentum_set) {
      if(g_seq_source_momentum[0]<0) g_seq_source_momentum[0] += LX_global;
      if(g_seq_source_momentum[1]<0) g_seq_source_momentum[1] += LY_global;
      if(g_seq_source_momentum[2]<0) g_seq_source_momentum[2] += LZ_global;
    } else if(g_source_momentum_set) {
      g_seq_source_momentum[0] = g_source_momentum[0];
      g_seq_source_momentum[1] = g_source_momentum[1];
      g_seq_source_momentum[2] = g_source_momentum[2];
    }
    fprintf(stdout, "# [invert_dw_quda] using final sequential source momentum ( %d, %d, %d )\n",
        g_seq_source_momentum[0], g_seq_source_momentum[1], g_seq_source_momentum[2]);
  }


  /***********************************************
   * loop on spin-color-index
   ***********************************************/
  for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++)
//  for(isc=g_source_index[0]; isc<=g_source_index[0]; isc++)
  {
    ispin = isc / n_c;
    icol  = isc % n_c;

    for(imom=0; imom<source_momentum_runs; imom++) {

      /***********************************************
       * set source momentum
       ***********************************************/
      if(g_source_momentum_set) {
        if(imom == 0) {
          if(full_orbit) {
            source_momentum[0] = 0;
            source_momentum[1] = 0;
            source_momentum[2] = 0;
          } else {
            source_momentum[0] = g_source_momentum[0];
            source_momentum[1] = g_source_momentum[1];
            source_momentum[2] = g_source_momentum[2];
          }
        } else {
          source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY_global*LZ_global);
          source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY_global*LZ_global) ) / LZ_global;
          source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ_global;
        }
        if(g_cart_id==0) fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n",
            imom, source_momentum[0], source_momentum[1], source_momentum[2]);
      
      }
 
      /***********************************************
       * prepare the souce
       ***********************************************/
      if(g_read_source == 0) {  // create source
        switch(g_source_type) {
          case 0:
            // point source
            if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating point source\n");
            for(ix=0;ix<L5*VOLUME;ix++) { _fv_eq_zero(g_spinor_field[0]+ix); }
            if(have_source_flag) {
              if(g_source_momentum_set) {
                phase = 2*M_PI*( source_momentum[0]*sl1/(double)LX_global + source_momentum[1]*sl2/(double)LY_global + source_momentum[2]*sl3/(double)LZ_global );
                g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)  ] = cos(phase);
                g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)+1] = sin(phase);
              } else {
                g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)  ] = 1.;
              }
            }
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d",
                  filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol);
            }
#ifdef HAVE_QUDA
            // set matpc_tpye
            source_location_5d_iseven = ( (g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin<n_s/2) || (!g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin>=n_s/2) ) ? 1 : 0;
            if(source_location_5d_iseven) {
              inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
              if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_EVEN_EVEN\n");
            } else {
              inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
              if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_ODD_ODD\n");
            }
#endif
            break;
          case 2:
            // timeslice source
            if(g_coherent_source==1) {
              if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating coherent timeslice source\n");
              status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1);
              if(status != 0) {
                fprintf(stderr, "[invert_dw_quda] Error from prepare source, status was %d\n", status);
#ifdef MPI
                MPI_Abort(MPI_COMM_WORLD, 123);
                MPI_Finalize();
#endif
                exit(123);
              }
              check_error(prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1),
                  "prepare_coherent_timeslice_source", NULL, 123);
              timeslice = g_coherent_source_base;
            } else {
              if(g_coherent_source==2) {
                timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global;
                fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n");
                check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, g_rng_state, 1),
                    "prepare_timeslice_source", NULL, 123);
              } else {
                if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n");
                check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_state, 1),
                    "prepare_timeslice_source", NULL, 124);
                timeslice = g_source_timeslice;
              }
            }
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, 
                  timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc);
            }
            break;
          case 3:
            // timeslice sources for one-end trick (spin dilution)
            fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n");
            check_error( prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum, isc%n_s, g_rng_state, \
                ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end", NULL, 125 );
            c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s;
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, 
                  g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c);
            }
            break;
          case 4:
            // timeslice sources for one-end trick (spin and color dilution )
            fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n");
            check_error(prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum,\
                isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1)  && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end_color", NULL, 126);
            c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c);
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, 
                  g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c);
            }
            break;
          case 5:
            if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] preparing sequential point source\n");
            check_error( prepare_sequential_point_source (g_spinor_field[0], isc, sl0, g_seq_source_momentum, 
                  smear_source, g_spinor_field[1], gauge_field_smeared), "prepare_sequential_point_source", NULL, 33);
            sprintf(source_filename, "%s.%.4d.t%.2dx%.2d.y%.2d.z%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf,
                sl0, sl1, sl2, sl3, isc, g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]);
            break;
          default:
            fprintf(stderr, "\nError, unrecognized source type\n");
            exit(32);
            break;
        }
      } else { // read source
        switch(g_source_type) {
          case 0:  // point source
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \
                  filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else  {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc);
            }
            fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename);
            check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115);
            break;
          case 2:  // timeslice source
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice,
                  isc, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc);
            }
            fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename);
            check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115);
            break;
          default:
            check_error(1, "source type", NULL, 104);
            break;
          case -1:  // timeslice source
            sprintf(source_filename, "%s", filename_prefix2);
            fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename);
            check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115);
            break;
        }
      }  // of if g_read_source
  
      if(g_write_source) {
        check_error(write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision), "write_propagator", NULL, 27);
      }

/***********************************************************************************************
 * here threads split: 
 ***********************************************************************************************/
      if(dummy_flag==0) strcpy(source_filename_write, source_filename);
      memcpy((void*)(smearing_spinor_field[0]), (void*)(g_spinor_field[0]), 24*VOLUME*sizeof(double));
      if(dummy_flag>0) {
        // copy only if smearing has been done; otherwise do not copy, do not invert
        if(g_cart_id==0) fprintf(stdout, "# [] copy smearing field -> g field\n");
        memcpy((void*)(g_spinor_field[0]), (void*)(smearing_spinor_field[1]), 24*VOLUME*sizeof(double));
      }

      omp_set_num_threads(g_num_threads);
#pragma omp parallel private(threadid, _2_kappa, is, ix, iy, iix, ratime, retime) shared(key,g_read_source, smear_source, N_Jacobi, kappa_Jacobi, smearing_spinor_field, g_spinor_field, nthreads, convert_sign, VOLUME, VOL3, T, L5, isc, rotate_gamma_basis, g_cart_id) firstprivate(inv_param, gauge_param, ofs)
{
      threadid = omp_get_thread_num();

  if(threadid < nthreads) {
      fprintf(stdout, "# [] proc%.2d thread%.2d starting source preparation\n", g_cart_id, threadid);

      // smearing
      if( ( !g_read_source || (g_read_source && smear_source ) ) && N_Jacobi > 0 ) {
        if(g_cart_id==0) fprintf(stdout, "#  [invert_dw_quda] smearing source with N_Jacobi=%d, kappa_Jacobi=%e\n", N_Jacobi, kappa_Jacobi);
        Jacobi_Smearing_threaded(gauge_field_smeared, smearing_spinor_field[0], smearing_spinor_field[1], kappa_Jacobi, N_Jacobi, threadid, nthreads);
      }


      /***********************************************
       * create the 5-dim. source field
       ***********************************************/
      if(convert_sign == 0) {
        spinor_4d_to_5d_threaded(smearing_spinor_field[0], smearing_spinor_field[0], threadid, nthreads);
      }  else if(convert_sign == 1 || convert_sign == -1) {
        spinor_4d_to_5d_sign_threaded(smearing_spinor_field[0], smearing_spinor_field[0], convert_sign, threadid, nthreads);
      }


      for(is=0; is<L5; is++) {
        for(it=threadid; it<T; it+=nthreads) {
          memcpy((void*)(g_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), (void*)(smearing_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), VOL3*24*sizeof(double));
        }
      }


      // reorder, multiply with g2
      for(is=0; is<L5; is++) {
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = (is*T+it)*VOL3 + i3;
            _fv_eq_zero(smearing_spinor_field[1]+_GSI(ix));
      }}} 

      if(rotate_gamma_basis) {
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(0, ix);
            _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix));
        }}
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(L5-1, ix);
            _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME));
        }}
      } else {
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(0, ix);
            _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix));
        }}
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(L5-1, ix);
            _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME));
        }}
      }
      fprintf(stdout, "# [] proc%.2d thread%.2d finished source preparation\n", g_cart_id, threadid);

  } else if(threadid == g_num_threads-1 && dummy_flag > 0) {  // else branch on threadid
      fprintf(stdout, "# [] proc%.2d thread%.2d starting inversion for dummy_flag = %d\n", g_cart_id, threadid, dummy_flag);

      /***********************************************
       * perform the inversion
       ***********************************************/
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n");

      xchange_field_5d(g_spinor_field[0]);
      memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double));
      ratime = CLOCK;
#ifdef MPI
      if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER  || inv_param.inv_type == QUDA_GCR_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n");
        invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else if(inv_param.inv_type == QUDA_CG_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n");
        testCG(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else {
        if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n");
      }
#else
      invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
#endif
      retime = CLOCK;

      if(g_cart_id==0) {
        fprintf(stdout, "# [invert_dw_quda] QUDA time:  %e seconds\n", inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] wall time:  %e seconds\n", retime-ratime);
        fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n",
        inv_param.spinorGiB, gauge_param.gaugeGiB);
      }
  }  // of if threadid

// wait till all threads are here
#pragma omp barrier

      if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) {
        _2_kappa = 2. * g_kappa5d;
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa );
        }
      }
  
#pragma omp barrier
      // reorder, multiply with g2
      for(is=0;is<L5;is++) {
      for(ix=threadid; ix<VOLUME; ix+=g_num_threads) {
        iy  = lexic2eot_5d(is, ix);
        iix = is*VOLUME + ix;
        _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy));
      }}
#pragma omp barrier
      if(rotate_gamma_basis) {
        for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) {
          _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix));
        }
      } else {
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix));
        }
      }
      if(g_cart_id==0 && threadid==g_num_threads-1) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime);

#pragma omp single
  {

#ifdef MPI
      xchange_field_5d(g_spinor_field[1]);
#endif
      /***********************************************
       * check residuum
       ***********************************************/
      if(check_residuum && dummy_flag>0) {
        // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg,
        //   which uses the tmLQCD conventions (same as in contractions)
        //   without explicit boundary conditions
#ifdef MPI
        xchange_field_5d(g_spinor_field[2]);
        xchange_field_5d(g_spinor_field[1]);
#endif
        memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double));

        //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id);
        //ofs = fopen(filename, "w");
        //printf_spinor_field_5d(g_spinor_field[1], ofs);
        //fclose(ofs);

        Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]);
  
        for(ix=0;ix<VOLUME*L5;ix++) {
          _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix));
        }
  
        spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5);
        spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5);
        if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) );

      }
  
      if(dummy_flag>0) {
        /***********************************************
         * create 4-dim. propagator
         ***********************************************/
        if(convert_sign == 0) {
          spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]);
        } else if(convert_sign == -1 || convert_sign == +1) {
          spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign);
        }
  
        /***********************************************
         * write the solution 
         ***********************************************/
        sprintf(filename, "%s.inverted", source_filename_write);
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename);
        check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22);
        
        //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id);
        //ofs = fopen(filename, "w");
        //printf_spinor_field(g_spinor_field[1], ofs);
        //fclose(ofs);
      }

      if(check_residuum) memcpy(g_spinor_field[2], smearing_spinor_field[0], 24*VOLUME*L5*sizeof(double));

  }  // of omp single

}    // of omp parallel region

      if(dummy_flag > 0) strcpy(source_filename_write, source_filename);

      dummy_flag++;
 
    }  // of loop on momenta

  }  // of isc

#if 0
  // last inversion

  {
      memcpy(g_spinor_field[0], smearing_spinor_field[1], 24*VOLUME*L5*sizeof(double));
      if(g_cart_id==0) fprintf(stdout, "# [] proc%.2d starting last inversion\n", g_cart_id);


      /***********************************************
       * perform the inversion
       ***********************************************/
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n");

      xchange_field_5d(g_spinor_field[0]);
      memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double));
      ratime = CLOCK;
#ifdef MPI
      if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER  || inv_param.inv_type == QUDA_GCR_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n");
        invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else if(inv_param.inv_type == QUDA_CG_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n");
        testCG(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else {
        if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n");
      }
#else
      invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
#endif
      retime = CLOCK;

      if(g_cart_id==0) {
        fprintf(stdout, "# [invert_dw_quda] QUDA time:  %e seconds\n", inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] wall time:  %e seconds\n", retime-ratime);
        fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n",
        inv_param.spinorGiB, gauge_param.gaugeGiB);
      }

      omp_set_num_threads(g_num_threads);
#pragma omp parallel private(threadid,_2_kappa,is,ix,iy,iix) shared(VOLUME,L5,g_kappa,g_spinor_field,g_num_threads)
    {
      threadid = omp_get_thread_num();

      if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) {
        _2_kappa = 2. * g_kappa5d;
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa );
        }
      }
#pragma omp barrier
      // reorder, multiply with g2
      for(is=0;is<L5;is++) {
      for(ix=threadid; ix<VOLUME; ix+=g_num_threads) {
        iy  = lexic2eot_5d(is, ix);
        iix = is*VOLUME + ix;
        _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy));
      }}
#pragma omp barrier
      if(rotate_gamma_basis) {
        for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) {
          _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix));
        }
      } else {
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix));
        }
      }

    }  // end of parallel region

    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime);


#ifdef MPI
      xchange_field_5d(g_spinor_field[1]);
#endif
      /***********************************************
       * check residuum
       ***********************************************/
      if(check_residuum && dummy_flag>0) {
        // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg,
        //   which uses the tmLQCD conventions (same as in contractions)
        //   without explicit boundary conditions
#ifdef MPI
        xchange_field_5d(g_spinor_field[2]);
#endif
        memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double));

        //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id);
        //ofs = fopen(filename, "w");
        //printf_spinor_field_5d(g_spinor_field[1], ofs);
        //fclose(ofs);


        Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]);
  
        for(ix=0;ix<VOLUME*L5;ix++) {
          _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix));
        }
  
        spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5);
        spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5);
        if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) );

      }
  
      /***********************************************
       * create 4-dim. propagator
       ***********************************************/
      if(convert_sign == 0) {
        spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]);
      } else if(convert_sign == -1 || convert_sign == +1) {
        spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign);
      }
  
      /***********************************************
       * write the solution 
       ***********************************************/
      sprintf(filename, "%s.inverted", source_filename_write);
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename);
      check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22);
        
      //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id);
      //ofs = fopen(filename, "w");
      //printf_spinor_field(g_spinor_field[1], ofs);
      //fclose(ofs);
  }  // of last inversion

#endif  // of if 0

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/

#ifdef HAVE_QUDA
  // finalize the QUDA library
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] finalizing quda\n");
#ifdef MPI
  freeGaugeQuda();
#endif
  endQuda();
#endif
  if(g_gauge_field != NULL) free(g_gauge_field);
  if(gauge_field_smeared != NULL) free(gauge_field_smeared);
  if(no_fields>0) {
    if(g_spinor_field!=NULL) {
      for(i=0; i<no_fields; i++) if(g_spinor_field[i]!=NULL) free(g_spinor_field[i]);
      free(g_spinor_field);
    }
  }
  free_geometry();

  if(g_source_momentum_set && full_orbit) {
    finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep);
    if(qlatt_map != NULL) {
      free(qlatt_map[0]);
      free(qlatt_map);
    }
  }
  if(source_momentum != NULL) free(source_momentum);
  if(lck != NULL) free(lck);


#ifdef MPI
#ifdef HAVE_QUDA
  endCommsQuda();
#else
  MPI_Finalize();
#endif
#endif
  if(g_cart_id==0) {
    g_the_time = time(NULL);
    fprintf(stdout, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time));
    fprintf(stderr, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time));
  }
  return(0);
}
Example #22
0
int main(int argc, char **argv) {

    int c, mu, nu, status;
    int i, j, ncon=-1, ir, is, ic, id;
    int filename_set = 0;
    int x0, x1, x2, x3, ix, iix;
    int y0, y1, y2, y3, iy, iiy;
    int start_valuet=0, start_valuex=0, start_valuey=0;
    int num_threads=1, threadid, nthreads;
    int seed, seed_set=0;
    double diff1, diff2;
    /*  double *chi=NULL, *psi=NULL; */
    double plaq=0., pl_ts, pl_xs, pl_global;
    double *gauge_field_smeared = NULL;
    double s[18], t[18], u[18], pl_loc;
    double spinor1[24], spinor2[24];
    double *pl_gather=NULL;
    double dtmp;
    complex prod, w, w2;
    int verbose = 0;
    char filename[200];
    char file1[200];
    char file2[200];
    FILE *ofs=NULL;
    double norm, norm2;
    fermion_propagator_type *prop=NULL, prop2=NULL, seq_prop=NULL, seq_prop2=NULL, prop_aux=NULL, prop_aux2=NULL;
    int idx, eoflag, shift;
    float *buffer = NULL;
    unsigned int VOL3;
    size_t items, bytes;

#ifdef MPI
    MPI_Init(&argc, &argv);
#endif

    while ((c = getopt(argc, argv, "h?vf:g:")) != -1) {
        switch (c) {
        case 'v':
            verbose = 1;
            break;
        case 'f':
            strcpy(filename, optarg);
            filename_set=1;
            break;
        case 'g':
            strcpy(file1, optarg);
            break;
        case 'h':
        case '?':
        default:
            usage();
            break;
        }
    }

    /* set the default values */
    if(filename_set==0) strcpy(filename, "cvc.input");
    if(g_cart_id==0) fprintf(stdout, "# Reading input from file %s\n", filename);
    read_input_parser(filename);


    /* some checks on the input data */
    if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
        if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
        usage();
    }
    if(g_kappa == 0.) {
        if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
        usage();
    }

    /* initialize MPI parameters */
    mpi_init(argc, argv);

    /* initialize T etc. */
    fprintf(stdout, "# [%2d] parameters:\n"\
            "# [%2d] T_global     = %3d\n"\
            "# [%2d] T            = %3d\n"\
            "# [%2d] Tstart       = %3d\n"\
            "# [%2d] LX_global    = %3d\n"\
            "# [%2d] LX           = %3d\n"\
            "# [%2d] LXstart      = %3d\n"\
            "# [%2d] LY_global    = %3d\n"\
            "# [%2d] LY           = %3d\n"\
            "# [%2d] LYstart      = %3d\n",\
            g_cart_id, g_cart_id, T_global, g_cart_id, T, g_cart_id, Tstart,
            g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart,
            g_cart_id, LY_global, g_cart_id, LY, g_cart_id, LYstart);

    if(init_geometry() != 0) {
        fprintf(stderr, "ERROR from init_geometry\n");
        exit(101);
    }
    geometry();

    if(init_geometry_5d() != 0) {
        fprintf(stderr, "ERROR from init_geometry_5d\n");
        exit(102);
    }
    geometry_5d();

    VOL3 = LX*LY*LZ;

    /* read the gauge field */
    alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
    if(g_cart_id==0) fprintf(stdout, "# gauge field file name %s\n", file1);

    // status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq);
    // status = read_ildg_nersc_gauge_field(g_gauge_field, filename);
    status = read_lime_gauge_field_doubleprec(file1);
    // status = read_nersc_gauge_field(g_gauge_field, filename, &plaq);
    // status = 0;
    if(status != 0) {
        fprintf(stderr, "[apply_Dtm] Error, could not read gauge field\n");
        EXIT(11);
    }
#ifdef MPI
    xchange_gauge();
#endif

    // measure the plaquette
    if(g_cart_id==0) fprintf(stdout, "# read plaquette value 1st field: %25.16e\n", plaq);
    plaquette(&plaq);
    if(g_cart_id==0) fprintf(stdout, "# measured plaquette value 1st field: %25.16e\n", plaq);


    sprintf(filename, "%s.dbl", file1);
    if(g_cart_id==0) fprintf(stdout, "# [] writing gauge field in double precision to file %s\n", filename);
    status = write_lime_gauge_field(filename, plaq, Nconf, 64);
    if(status != 0) {
        fprintf(stderr, "[apply_Dtm] Error, could not write gauge field\n");
        EXIT(12);
    }

    /***********************************************
     * free the allocated memory, finalize
     ***********************************************/
    free(g_gauge_field);
    free_geometry();

    g_the_time = time(NULL);
    fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time));
    fflush(stdout);
    fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time));
    fflush(stderr);


#ifdef MPI
    MPI_Finalize();
#endif
    return(0);
}
Example #23
0
File: jc_ud_x.c Project: etmc/cvc
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int sid, status, gid;
  double *disc  = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  int do_gt   = 0;
  char filename[100], contype[200];
  double ratime, retime;
  double plaq; 
  double spinor1[24], spinor2[24], U_[18];
  double *gauge_trafo=(double*)NULL;
  complex w, w1, *cp1, *cp2, *cp3;
  FILE *ofs; 


#ifdef MPI
//  MPI_Init(&argc, &argv);
  fprintf(stderr, "[jc_ud_x] Error, only non-mpi version implemented\n");
  exit(1);
#endif

  while ((c = getopt(argc, argv, "h?f:")) != -1) {
    switch (c) {
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n");
  fprintf(stdout, "* jc_ud_x\n");
  fprintf(stdout, "**************************************************\n\n");

  /*********************************
   * initialize MPI parameters 
   *********************************/
  // mpi_init(argc, argv);

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  /*************************************************
   * allocate mem for gauge field and spinor fields
   *************************************************/
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);

  no_fields = 2;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
    exit(3);
  }

  /***********************************************
   * start loop on gauge id.s 
   ***********************************************/
  for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) {

    for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

    sprintf(filename, "%s.%.4d", gaugefilename_prefix, gid);
    if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename);
    read_lime_gauge_field_doubleprec(filename);
    xchange_gauge();
    plaquette(&plaq);
    if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq);

    /***********************************************
     * start loop on source id.s 
     ***********************************************/
    for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {
      /* reset disc to zero */
      for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

      /* read the new propagator to g_spinor_field[0] */
      ratime = (double)clock() / CLOCKS_PER_SEC;
      if(format==0) {
        sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, gid, sid);
        if(read_lime_spinor(g_spinor_field[0], filename, 0) != 0) break;
      }
      else if(format==1) {
        sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, gid, sid);
        if(read_cmi(g_spinor_field[0], filename) != 0) break;
      }
      xchange_field(g_spinor_field[0]);
      retime = (double)clock() / CLOCKS_PER_SEC;
      if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime);

      ratime = (double)clock() / CLOCKS_PER_SEC;

      /* apply D_W once, save in g_spinor_field[1] */
      Hopping(g_spinor_field[1], g_spinor_field[0]);
      for(ix=0; ix<VOLUME; ix++) {
        _fv_pl_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix));
        _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix),  1./(2.*g_kappa));
      }
      xchange_field(g_spinor_field[1]);

      retime = (double)clock() / CLOCKS_PER_SEC;
      if(g_cart_id==0) fprintf(stdout, "# time to apply D_W: %e seconds\n", retime-ratime);

      ratime = (double)clock() / CLOCKS_PER_SEC;
      /* calculate real and imaginary part */
      for(mu=0; mu<4; mu++) {
        for(ix=0; ix<VOLUME; ix++) {
          _cm_eq_cm_ti_co(U_, g_gauge_field+_GGI(ix,mu), &(co_phase_up[mu]));
          _fv_eq_gamma_ti_fv(spinor1, 5, g_spinor_field[0]+_GSI(g_iup[ix][mu]));
          _fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
          _fv_pl_eq_fv(spinor2, spinor1);
          _fv_eq_cm_ti_fv(spinor1, U_, spinor2);
          _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(ix), spinor1);
          disc[_GWI(mu,ix,VOLUME)  ] = g_mu * w.im;

          _fv_eq_gamma_ti_fv(spinor1, mu, g_spinor_field[1]+_GSI(g_iup[ix][mu]));
          _fv_pl_eq_fv(spinor1, g_spinor_field[1]+_GSI(g_iup[ix][mu]));
          _fv_eq_cm_ti_fv(spinor2, U_, spinor1);
          _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(ix), spinor2);
          disc[_GWI(mu,ix,VOLUME)+1] = w.im / 3.;
        }
      }
      retime = (double)clock() / CLOCKS_PER_SEC;
      if(g_cart_id==0) fprintf(stdout, "# time to calculate contractions: %e seconds\n", retime-ratime);

      /************************************************
       * save results
       ************************************************/
      if(g_cart_id == 0) fprintf(stdout, "# save results for gauge id %d and sid %d\n", gid, sid);

      /* save the result in position space */
      fnorm = 1. / g_prop_normsqr;
      if(g_cart_id==0) fprintf(stdout, "X-fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
        for(ix=0; ix<VOLUME; ix++) {
          disc[_GWI(mu,ix,VOLUME)  ] *= fnorm;
          disc[_GWI(mu,ix,VOLUME)+1] *= fnorm;
        }
      }
      sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, sid);
      sprintf(contype, "jc-u_and_d-X");
      write_lime_contraction(disc, filename, 64, 4, contype, gid, sid);

      //sprintf(filename, "jc_ud_x.%.4d.%.4d.ascii", gid, sid);
      //write_contraction (disc, NULL, filename, 4, 2, 0);
 
    }  /* of loop on sid */
  }  /* of loop on gid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  free(disc);

  return(0);

}
Example #24
0
int main(int argc, char **argv) {
  
  int c, mu, nu, status, gid;
  int filename_set = 0;
  int source_location, have_source_flag = 0;
  int x0, x1, x2, x3, ix, iix;
  int sx0, sx1, sx2, sx3;
  int tsize = 0;
  double *conn  = NULL;
  double *conn2 = NULL;
  double *conn3 = NULL;
  int verbose = 0;
  char filename[200];
  double ratime, retime;
  FILE *ofs;
  double q[4], wre, wim, dtmp;
  int check_WI = 0, write_ascii=0;
  unsigned int VOL3=0;

  while ((c = getopt(argc, argv, "AWh?vf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'W':
      check_WI = 1;
      fprintf(stdout, "# [get_corr_v5] check Ward Identity\n");
      break;
    case 'A':
      write_ascii = 1;
      fprintf(stdout, "# [get_corr_v5] write Pi_mn in ASCII format\n");
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  g_the_time = time(NULL);

  // set the default values
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# [get_corr_v5] reading input parameters from file %s\n", filename);
  read_input_parser(filename);

  // some checks on the input data
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    fprintf(stdout, "# [get_corr_v5] T=%d, LX=%d, LY=%d, LZ=%d\n", T_global, LX, LY, LZ);
    if(g_proc_id==0) fprintf(stderr, "[get_corr_v5] Error, T and L's must be set\n");
    usage();
  }

  // initialize MPI parameters
  mpi_init(argc, argv);

  T            = T_global;
  Tstart       = 0;
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n",
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart);

  if(init_geometry() != 0) {
    fprintf(stderr, "[get_corr_v5] Error from init_geometry\n");
    EXIT(1);
  }

  geometry();

  VOL3 = LX*LY*LZ;
  /****************************************
   * allocate memory for the contractions *
   ****************************************/
  conn = (double*)calloc(32 * VOLUME, sizeof(double));
  if( (conn==NULL) ) {
    fprintf(stderr, "[get_corr_v5] Error, could not allocate memory for contr. fields\n");
    EXIT(2);
  }

  conn2= (double*)calloc(2 * T, sizeof(double));
  if( (conn2==NULL) ) {
    fprintf(stderr, "[get_corr_v5] Error, could not allocate memory for corr.\n");
    EXIT(3);
  }

  conn3= (double*)calloc(2 * T, sizeof(double));
  if( (conn3==NULL) ) {
    fprintf(stderr, "[get_corr_v5] Error, could not allocate memory for corr.\n");
    EXIT(3);
  }

  /********************************
   * determine source coordinates *
   ********************************/
/*
  have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T));
  if(have_source_flag==1) fprintf(stdout, "# [get_corr_v5] process %2d has source location\n", g_cart_id);
  sx0 = g_source_location/(LX*LY*LZ)-Tstart;
  sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ);
  sx2 = (g_source_location%(LY*LZ)) / LZ;
  sx3 = (g_source_location%LZ);
  if(have_source_flag==1) { 
    fprintf(stdout, "# [get_corr_v5] local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3);
    source_location = g_ipt[sx0][sx1][sx2][sx3];
  }
  have_source_flag = 0;
*/

  for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) {
    memset(conn, 0, 32*VOLUME*sizeof(double));
    /***********************
     * read contractions   *
     ***********************/
    ratime = CLOCK;
    sprintf(filename, "%s.%.4d", filename_prefix, gid);
    if(format==2 || format==3) {
      status = read_contraction(conn, NULL, filename, 16);
    } else if( format==0) {
      status = read_lime_contraction(conn, filename, 16, 0);
    }
    if(status != 0) {
      // fprintf(stderr, "[get_corr_v5] Error from read_contractions, status was %d\n", status);
      // EXIT(5);
      fprintf(stderr, "[get_corr_v5] Warning, could not read contractions for gid %d, status was %d\n", gid, status);
      continue;
    }
    retime = CLOCK;
    fprintf(stdout, "# [get_corr_v5] time to read contractions %e seconds\n", retime-ratime);
  
    // TEST Pi_mm
    if(write_ascii) {
      sprintf(filename, "pimm_test.%.4d", gid);
      ofs = fopen(filename, "w");
      if(ofs == NULL) exit(33);
      fprintf(ofs, "# Pi_mm\n# %s", ctime(&g_the_time));
      for(x0=0; x0<T; x0++) {
      for(x1=0; x1<LX; x1++) {
      for(x2=0; x2<LY; x2++) {
      for(x3=0; x3<LZ; x3++) {
        fprintf(ofs, "# t=%3d x=%3d y=%3d z=%3d\n", x0, x1, x2, x3);
        ix = g_ipt[x0][x1][x2][x3];
        for(nu=0;nu<4;nu++) {
          wre = conn[_GWI(5*nu,ix,VOLUME)];
          wim = conn[_GWI(5*nu,ix,VOLUME)+1];
          fprintf(ofs, "%3d%16.7e%16.7e\n", nu, wre, wim);
        }
      }}}}
      fclose(ofs);
    }  // of if write_ascii

    // TEST Ward Identity
    if(check_WI) {
      fprintf(stdout, "# [get_corr_v5] Ward identity\n");
      sprintf(filename, "WI.%.4d", gid);
      ofs = fopen(filename, "w");
      if(ofs == NULL) exit(32);
      for(x0=0; x0<T; x0++) {
        q[0] = 2. * sin(M_PI * (double)x0 / (double)T);
      for(x1=0; x1<LX; x1++) {
        q[1] = 2. * sin(M_PI * (double)x1 / (double)LX);
      for(x2=0; x2<LY; x2++) {
        q[2] = 2. * sin(M_PI * (double)x2 / (double)LY);
      for(x3=0; x3<LZ; x3++) {
        q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ);
        ix = g_ipt[x0][x1][x2][x3];
        for(nu=0;nu<4;nu++) {
          wre =   q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \
                + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)];
          wim =   q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \
                + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1];
          fprintf(ofs, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim);
        }
      }}}}
      fclose(ofs);
    }
  
    /***********************
     * fill the correlator *
     ***********************/
    ratime = CLOCK;
    memset(conn2, 0, 2*T*sizeof(double));
    // (1) V0V0
    for(x0=0; x0<T; x0++) {
      for(ix=0; ix<VOL3; ix++) {
        iix = _GWI(0,x0*VOL3+ix,VOLUME);
        conn2[2*x0  ] += conn[iix  ];
        conn2[2*x0+1] += conn[iix+1];
      }
    }
    // (2) VKVK
    memset(conn3, 0, 2*T*sizeof(double));
    for(x0=0; x0<T; x0++) {
      for(ix=0; ix<VOL3; ix++) {
        iix = x0 * VOL3 + ix;
        conn3[2*x0  ] += conn[_GWI(5,iix,VOLUME)  ] + conn[_GWI(10,iix,VOLUME)  ] + conn[_GWI(15,iix,VOLUME)  ];
        conn3[2*x0+1] += conn[_GWI(5,iix,VOLUME)+1] + conn[_GWI(10,iix,VOLUME)+1] + conn[_GWI(15,iix,VOLUME)+1];
      }
    }

    // normalization
    dtmp = 1. / (double)VOL3;
    for(x0=0; x0<2*T; x0++) { conn2[x0] *= dtmp; }
    for(x0=0; x0<2*T; x0++) { conn3[x0] *= dtmp; }
    
    retime = CLOCK;
    fprintf(stdout, "# [get_corr_v5] time to fill correlator %e seconds\n", retime-ratime);
   
    // TEST
/*
    fprintf(stdout, "# [get_corr_v5] V0V0 correlator\n");
    for(x0=0; x0<T; x0++) {
      fprintf(stdout, "\t%3d%25.16e%25.16e\n",x0, conn2[2*x0], conn2[2*x0+1]);
    }
    fprintf(stdout, "# [get_corr_v5] VKVK correlator\n");
    for(x0=0; x0<T; x0++) {
      fprintf(stdout, "\t%3d%25.16e%25.16e\n",x0, conn3[2*x0], conn3[2*x0+1]);
    }
*/  
    /*****************************************
     * write to file
     *****************************************/
    ratime = CLOCK;
    sprintf(filename, "p00_corr.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == NULL ) {
      fprintf(stderr, "[get_corr_v5] Error, could not open file %s for writing\n", filename);
      EXIT(6);
    }
    x0 = 0;
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid);
    for(x0=1; x0<T/2; x0++) {
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], gid);
    }
    x0 = T / 2;
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid);
    fclose(ofs);
  
    sprintf(filename, "pkk_corr.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == NULL ) {
      fprintf(stderr, "[get_corr_v5] Error, could not open file %s for writing\n", filename);
      EXIT(7);
    }
    x0 = 0;
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn3[2*x0], 0., gid);
    for(x0=1; x0<T/2; x0++) {
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn3[2*x0], conn3[2*(T-x0)], gid);
    }
    x0 = T / 2;
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn3[2*x0], 0., gid);
    fclose(ofs);

    retime = CLOCK;
    fprintf(stdout, "# [get_corr_v5] time to write correlator %e seconds\n", retime-ratime);
  }  // of loop on gid

  /***************************************
   * free the allocated memory, finalize *
   ***************************************/
  free_geometry();
  if(conn  != NULL) free(conn);
  if(conn2 != NULL) free(conn2);
  if(conn3 != NULL) free(conn3);

  fprintf(stdout, "# [get_corr_v5] %s# [get_corr_v5] end of run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "[get_corr_v5] %s[get_corr_v5] end of run\n", ctime(&g_the_time));
  fflush(stderr);

  return(0);

}
Example #25
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int dxm[4], dxn[4], ixpm, ixpn;
  int sid;
  double *disc  = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  int do_gt   = 0;
  char filename[100];
  double ratime, retime;
  double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2;
  double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18];
  double *gauge_trafo=(double*)NULL;
  complex w, w1, w2, *cp1, *cp2, *cp3;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
  int *status;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  set_default_input_values();
  if(filename_set==0) strcpy(filename, "cvc.input");

  /* read the input file */
  read_input(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);
#ifdef MPI
  if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) {
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(7);
  }
#endif

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
  xchange_gauge();

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq);

  if(do_gt==1) {
    /***********************************
     * initialize gauge transformation
     ***********************************/
    init_gauge_trafo(&gauge_trafo, 1.);
    apply_gt_gauge(gauge_trafo);
    plaquette(&plaq);
    if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq);
  }

  /****************************************
   * allocate memory for the spinor fields
   ****************************************/
  no_fields = 3;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

  work  = (double*)calloc(48*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  /***********************************************
   * start loop on source id.s 
   ***********************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid++) {

    /********************************
     * read the first propagator
     ********************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    if(do_gt==1) {
      /******************************************
       * gauge transform the propagators for sid
       ******************************************/
      for(ix=0; ix<VOLUME; ix++) {
        _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix));
        _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1);
      }
      xchange_field(g_spinor_field[2]);
    }

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);


    /************************************************
     * HPE: apply BH5 
     ************************************************/
    BH5(g_spinor_field[1], g_spinor_field[2]);

    for(ix=0; ix<8*VOLUME; ix++) {disc[ix] = 0.;}

    /* add new contractions to (existing) disc */
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

	iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime);

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

    /* Fourier transform data, copy to work */
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_p, in, NULL);
#endif
      memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }

    /********************************
     * read the second propagator
     ********************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid+g_resume);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid+g_resume);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    if(do_gt==1) {
      /******************************************
       * gauge transform the propagators for sid
       ******************************************/
      for(ix=0; ix<VOLUME; ix++) {
        _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix));
        _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1);
      }
      xchange_field(g_spinor_field[2]);
    }

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);


    /************************************************
     * HPE: apply BH5 
     ************************************************/
    BH5(g_spinor_field[1], g_spinor_field[2]);

    for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

    /* add new contractions to (existing) disc */
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

	iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime);

#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

    /* Fourier transform data, copy to work */
    for(mu=0; mu<4; mu++) {
      memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
      fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
      fftwnd_one(plan_m, in, NULL);
#endif
      memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
    }

    fnorm = 1. / ((double)(T_global*LX*LY*LZ));
    fprintf(stdout, "fnorm = %e\n", fnorm);
    for(mu=0; mu<4; mu++) {
    for(nu=0; nu<4; nu++) {
      cp1 = (complex*)(work+_GWI(mu,0,VOLUME));
      cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME));
      cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME));
     
      for(x0=0; x0<T; x0++) {
        q[0] = (double)(x0+Tstart) / (double)T_global;
      for(x1=0; x1<LX; x1++) {
        q[1] = (double)(x1) / (double)LX;
      for(x2=0; x2<LY; x2++) {
        q[2] = (double)(x2) / (double)LY;
      for(x3=0; x3<LZ; x3++) {
        q[3] = (double)(x3) / (double)LZ;
        ix = g_ipt[x0][x1][x2][x3];
        w.re = cos( M_PI * (q[mu]-q[nu]) );
	w.im = sin( M_PI * (q[mu]-q[nu]) );
	_co_eq_co_ti_co(&w1, cp1, cp2);
	_co_eq_co_ti_co(cp3, &w1, &w);
	_co_ti_eq_re(cp3, fnorm);
	cp1++; cp2++; cp3++;
      }
      }
      }
      }

    }
    }
  
    /* save the result in momentum space */
    sprintf(filename, "cvc_hpe5_ft.%.4d.%.2d", Nconf, sid);
    write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 0, 0);

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to save cvc results: %e seconds\n", retime-ratime);

  }  /* of loop on sid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc);

  free(work);

#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  free(status);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}
Example #26
0
File: jc_ud_tr.c Project: etmc/cvc
int main(int argc, char **argv) {
  
  int c, i, mu;
  int count        = 0;
  int filename_set = 0;
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, it, iy;
  int sid1, sid2, status, gid;
  int Thp1, Lhp1, nmom, shift[4], shift2[4], nperm;
  double *disc1=NULL, *disc2=NULL;
  double *work = NULL;
  double r2, fnorm;
  char filename[100];
  double ratime, retime;
  complex w;
  int *mom_tab=NULL, *mom_members=NULL, *mom_perm=NULL;
  FILE *ofs;

  int perm_tab_3[6][3];
  perm_tab_3[0][0] =  0; 
  perm_tab_3[0][1] =  1; 
  perm_tab_3[0][2] =  2;
  perm_tab_3[1][0] =  1; 
  perm_tab_3[1][1] =  2; 
  perm_tab_3[1][2] =  0;
  perm_tab_3[2][0] =  2; 
  perm_tab_3[2][1] =  0; 
  perm_tab_3[2][2] =  1;
  perm_tab_3[3][0] =  0; 
  perm_tab_3[3][1] =  2; 
  perm_tab_3[3][2] =  1;
  perm_tab_3[4][0] =  1; 
  perm_tab_3[4][1] =  0; 
  perm_tab_3[4][2] =  2;
  perm_tab_3[5][0] =  2; 
  perm_tab_3[5][1] =  1; 
  perm_tab_3[5][2] =  0;

  while ((c = getopt(argc, argv, "h?f:")) != -1) {
    switch (c) {
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  fprintf(stdout, "\n**************************************************\n");
  fprintf(stdout, "* jc_ud_tr\n");
  fprintf(stdout, "**************************************************\n\n");

  /* initialize */
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "#       T            = %3d\n"\
		  "#       Tstart       = %3d\n"\
		  "#       l_LX_at      = %3d\n"\
		  "#       l_LXstart_at = %3d\n"\
		  "#       FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, T, Tstart, l_LX_at, l_LXstart_at, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  Thp1 = T /2 + 1;
  Lhp1 = LX/2 + 1;
  nmom = 3*Lhp1 - 2;

  /****************************************
   * initialize the momenta
   ****************************************/
  mom_tab = (int*)calloc(3*nmom, sizeof(int));
  if( mom_tab==NULL) { 
    fprintf(stderr, "could not allocate memory for mom_tab\n");
    exit(4);
  }
  mom_tab[0] = 0; mom_tab[1] = 0; mom_tab[2] = 0;
  count=3;
  for(x1=1; x1<Lhp1; x1++) {
    mom_tab[count  ] = x1;
    mom_tab[count+1] = 0;
    mom_tab[count+2] = 0;
    mom_tab[count+3] = x1;
    mom_tab[count+4] = 1;
    mom_tab[count+5] = 0;
    mom_tab[count+6] = x1;
    mom_tab[count+7] = 1;
    mom_tab[count+8] = 1;
    count+=9;
  }

  mom_members = (int*)calloc(Thp1*nmom, sizeof(int));
  mom_perm    = (int*)calloc(nmom, sizeof(int));
  mom_perm[0] = 1;
  mom_perm[1] = 3;
  mom_perm[2] = 3;
  mom_perm[3] = 1;
  for (i=2; i<Lhp1; i++) {
    mom_perm[3*i-2] = 3;
    mom_perm[3*i-1] = 6;
    mom_perm[3*i  ] = 3;
  }
  for (i=0; i<nmom; i++)
    fprintf(stdout, "# %d\t(%d, %d, %d)\t%d\n", i, mom_tab[3*i], mom_tab[3*i+1], mom_tab[3*i+2], mom_perm[i]);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc1  = (double*)calloc(8*VOLUME, sizeof(double));
  disc2  = (double*)calloc(8*VOLUME, sizeof(double));
  if( disc1==NULL || disc2==NULL) { 
    fprintf(stderr, "could not allocate memory for disc\n");
    exit(3);
  }

  work  = (double*)calloc(8*Thp1*nmom, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
    exit(3);
  }

  /***********************************************
   * start loop on gauge id.s 
   ***********************************************/
  for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) {

    for(ix=0; ix<8*Thp1*nmom; ix++) work[ix] = 0.;
    for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.;
    for (i=0; i<Thp1*nmom; i++) mom_members[i] = 0;

    /***********************************************
     * start loop on source id.s 
     ***********************************************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
    for(sid1=g_sourceid; sid1<=g_sourceid2; sid1+=g_sourceid_step) {

      sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, sid1);
      if(read_lime_contraction(disc1, filename, 4, 0) != 0) break;
      for(ix=0; ix<8*VOLUME; ix++) disc2[ix] += disc1[ix];
      
      count=0;
      for (it=0; it<Thp1; it++) {
        shift[0] = it; shift2[0] = it;
        for (i=0; i<nmom; i++) {
          shift[1] = mom_tab[3*i  ];
          shift[2] = mom_tab[3*i+1];
          shift[3] = mom_tab[3*i+2];
          for (mu=0; mu<mom_perm[i]; mu++) {
            // fprintf(stdout, "# mom=%d,\tperm=%d\n", i, mom_perm[i]);
            shift2[1] = shift[perm_tab_3[mu][0]+1];
            shift2[2] = shift[perm_tab_3[mu][1]+1];
            shift2[3] = shift[perm_tab_3[mu][2]+1];
            for(x0=shift2[0]; x0<T; x0++) {
            for(x1=shift2[1]; x1<LX; x1++) {
            for(x2=shift2[2]; x2<LY; x2++) {
            for(x3=shift2[3]; x3<LZ; x3++) {
              ix = g_ipt[x0][x1][x2][x3];
              iy = g_ipt[x0-shift2[0]][x1-shift2[1]][x2-shift2[2]][x3-shift2[3]];
              // fprintf(stdout, "shift2=(%d,%d,%d,%d); x=(%d,%d,%d,%d); ix=%d, iy=%d\n",
              //   shift2[0], shift2[1],shift2[2],shift2[3], x0, x1, x2, x3, ix, iy);
              _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(0,ix,VOLUME)), (complex*)(disc1+_GWI(0,iy,VOLUME)));
              work[2*(            count)  ] -= w.re;
              work[2*(            count)+1] -= w.im;
              _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(1,ix,VOLUME)), (complex*)(disc1+_GWI(1,iy,VOLUME)));
              work[2*(  Thp1*nmom+count)  ] -= w.re;
              work[2*(  Thp1*nmom+count)+1] -= w.im;
              _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(2,ix,VOLUME)), (complex*)(disc1+_GWI(2,iy,VOLUME)));
              work[2*(2*Thp1*nmom+count)  ] -= w.re;
              work[2*(2*Thp1*nmom+count)+1] -= w.im;
              _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(3,ix,VOLUME)), (complex*)(disc1+_GWI(3,iy,VOLUME)));
              work[2*(3*Thp1*nmom+count)  ] -= w.re;
              work[2*(3*Thp1*nmom+count)+1] -= w.im;
            }}}}
          }
          count++;
        }
      }  /* of it=0,...,T/2 */
    }  /* of loop on sid1 */
    count=0;
    for (it=0; it<Thp1; it++) {
      shift[0] = it; shift2[0] = it;
      for (i=0; i<nmom; i++) {
        shift[1] = mom_tab[3*i  ];
        shift[2] = mom_tab[3*i+1];
        shift[3] = mom_tab[3*i+2];
        for (mu=0; mu<mom_perm[i]; mu++) {
          // fprintf(stdout, "# mom=%d,\tperm=%d\n", i, mom_perm[i]);
          shift2[1] = shift[perm_tab_3[mu][0]+1];
          shift2[2] = shift[perm_tab_3[mu][1]+1];
          shift2[3] = shift[perm_tab_3[mu][2]+1];
          for(x0=shift2[0]; x0<T; x0++) {
          for(x1=shift2[1]; x1<LX; x1++) {
          for(x2=shift2[2]; x2<LY; x2++) {
          for(x3=shift2[3]; x3<LZ; x3++) {
            ix = g_ipt[x0][x1][x2][x3];
            iy = g_ipt[x0-shift2[0]][x1-shift2[1]][x2-shift2[2]][x3-shift2[3]];
            // fprintf(stdout, "shift2=(%d,%d,%d,%d); x=(%d,%d,%d,%d); ix=%d, iy=%d\n",
            //   shift2[0], shift2[1],shift2[2],shift2[3], x0, x1, x2, x3, ix, iy);
            _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(0,ix,VOLUME)), (complex*)(disc2+_GWI(0,iy,VOLUME)));
            work[2*(            count)  ] += w.re;
            work[2*(            count)+1] += w.im;
            _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(1,ix,VOLUME)), (complex*)(disc2+_GWI(1,iy,VOLUME)));
            work[2*(  Thp1*nmom+count)  ] += w.re;
            work[2*(  Thp1*nmom+count)+1] += w.im;
            _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(2,ix,VOLUME)), (complex*)(disc2+_GWI(2,iy,VOLUME)));
            work[2*(2*Thp1*nmom+count)  ] += w.re;
            work[2*(2*Thp1*nmom+count)+1] += w.im;
            _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(3,ix,VOLUME)), (complex*)(disc2+_GWI(3,iy,VOLUME)));
            work[2*(3*Thp1*nmom+count)  ] += w.re;
            work[2*(3*Thp1*nmom+count)+1] += w.im;
            mom_members[count]++;
          }}}}
        }
        count++;
      }
    }  /* of it=0,...,T/2 */

    /* normalization */
    count=0;
    for (it=0; it<Thp1; it++) {
      for (i=0; i<nmom; i++) {
        fprintf(stdout, "%d\t%d\t%d\n", it, i, mom_members[count]);
        count++;
      }
    }

    for (mu=0; mu<4; mu++) {
      count=0;
      for (it=0; it<Thp1; it++) {
        for(i=0; i<nmom; i++) {
          fnorm = 1. / ( (double)mom_members[count]
            * (double)(g_sourceid2-g_sourceid+1) * (double)(g_sourceid2-g_sourceid) );
//          fprintf(stdout, "# fnorm(%d,%2d) = %25.16e\n", mu, count, fnorm);
          work[2*(mu*Thp1*nmom+count)  ] *= fnorm;
          work[2*(mu*Thp1*nmom+count)+1] *= fnorm;
          count++;
        }
      }
    }

    retime = (double)clock() / CLOCKS_PER_SEC;
    if(g_cart_id == 0) fprintf(stdout, "# time for building correl.: %e seconds\n", retime-ratime);


    /************************************************
     * save results
     ************************************************/
    sprintf(filename, "jc_ud_tr.%4d", gid);
    ofs = fopen(filename, "w");
    if (ofs==NULL) {
     fprintf(stderr, "Error, could not open file %s for writing\n", filename);
    }
    for(mu=0; mu<4; mu++) {
      count=0;
      for (it=0; it<Thp1; it++) {
        for (i=0; i<nmom; i++) {
          r2 = sqrt( mom_tab[3*i]*mom_tab[3*i] + mom_tab[3*i+1]*mom_tab[3*i+1]
                   + mom_tab[3*i+2]*mom_tab[3*i+2] );
          fprintf(ofs, "%3d%3d%3d%3d%16.7e%25.16e%25.16e\n", it, 
            mom_tab[3*i], mom_tab[3*i+1],mom_tab[3*i+2], r2, 
            work[2*(mu*Thp1*nmom+count)], work[2*(mu*Thp1*nmom+count)+1]);
          count++;
        }
      }
    }
    fclose(ofs);

  }  /* of loop on gid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free_geometry();
  free(disc1);
  free(disc2);
  free(work);
  free(mom_tab);
  free(mom_perm);
  free(mom_members);

  return(0);

}
Example #27
0
int main(int argc, char **argv) {
  
  int c, i, mu, nu;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix;
  int dxm[4], dxn[4], ixpm, ixpn;
  int sid;
  double *disc  = (double*)NULL;
  double *disc2 = (double*)NULL;
  double *work = (double*)NULL;
  double q[4], fnorm;
  int verbose = 0;
  int do_gt   = 0;
  char filename[100], contype[200];
  double ratime, retime;
  double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2;
  double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18];
  double *gauge_trafo=(double*)NULL;
  complex w, w1, w2, *cp1, *cp2, *cp3;
  FILE *ofs;

  fftw_complex *in=(fftw_complex*)NULL;

#ifdef MPI
  fftwnd_mpi_plan plan_p, plan_m;
#else
  fftwnd_plan plan_p, plan_m;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?vgf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize fftw */
  dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ;
#ifdef MPI
  plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE);
  plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE);
  fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME);
#else
  plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD,  FFTW_MEASURE | FFTW_IN_PLACE);
  T            = T_global;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
#endif
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(1);
  }

  geometry();

  /* read the gauge field */
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
  if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename);
  read_lime_gauge_field_doubleprec(filename);
#ifdef MPI
  xchange_gauge();
#endif

  /* measure the plaquette */
  plaquette(&plaq);
  if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq);

  if(do_gt==1) {
    /***********************************
     * initialize gauge transformation
     ***********************************/
    init_gauge_trafo(&gauge_trafo, 1.);
    apply_gt_gauge(gauge_trafo);
    plaquette(&plaq);
    if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq);
  }

  /****************************************
   * allocate memory for the spinor fields
   ****************************************/
  no_fields = 3;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND);

  /****************************************
   * allocate memory for the contractions
   ****************************************/
  disc  = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;

  disc2 = (double*)calloc( 8*VOLUME, sizeof(double));
  if( disc2 == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for disc2\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }
  for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.;

  work  = (double*)calloc(48*VOLUME, sizeof(double));
  if( work == (double*)NULL ) { 
    fprintf(stderr, "could not allocate memory for work\n");
#  ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#  endif
    exit(3);
  }

  /****************************************
   * prepare Fourier transformation arrays
   ****************************************/
  in  = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex));
  if(in==(fftw_complex*)NULL) {    
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
#endif
    exit(4);
  }

  /************************************************
   * HPE: calculate coeff. of 3rd order term
   ************************************************/
  _2kappamu    = 2. * g_kappa * g_mu;
  onepmutilde2 = 1. + _2kappamu * _2kappamu;
  mutilde2     = _2kappamu * _2kappamu;

  hpe3_coeff   = 16. * g_kappa*g_kappa*g_kappa*g_kappa * (1. + 6. * mutilde2 + mutilde2*mutilde2) / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2;

/*
  hpe3_coeff = 8. * g_kappa*g_kappa*g_kappa * \
        (1. + 6.*_2kappamu*_2kappamu + _2kappamu*_2kappamu*_2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu);
*/
  fprintf(stdout, "hpe3_coeff = %25.16e\n", hpe3_coeff);

  /************************************************
   * HPE: calculate the plaquette terms 
   ************************************************/

  for(ix=0; ix<VOLUME; ix++) {
    for(mu=0; mu<4; mu++) { 
      for(i=1; i<4; i++) {
        nu = (mu+i)%4;
        _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(ix,mu), g_gauge_field+_GGI(g_iup[ix][mu],nu) );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(ix,nu), g_gauge_field+_GGI(g_iup[ix][nu],mu) );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w1, U_);

        iix = g_idn[ix][nu];
        _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(iix,mu), g_gauge_field+_GGI(g_iup[iix][mu],nu) );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(iix,nu), g_gauge_field+_GGI(g_iup[iix][nu],mu) );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w2, U_);
        disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im - w2.im);

/*
        _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(g_idn[ix][nu],nu), g_gauge_field+_GGI(ix,mu) );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(g_idn[ix][nu],mu), g_gauge_field+_GGI(g_iup[g_idn[ix][nu]][mu], nu) );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w2, U_);
        disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im);
*/


/*        fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e +i %25.16e; w2=%25.16e +i %25.16e\n", 
            mu, ix, nu, w1.re, w1.im, w2.re, w2.im); */
      }  /* of nu */

      /****************************************
       * - in case lattice size equals 4 
       *   calculate additional loop term
       * - _NOTE_ the possible minus sign from
       *   the fermionic boundary conditions
       ****************************************/
      if(dims[mu]==4) {
        wilson_loop(&w, ix, mu, dims[mu]);
        fnorm = -64. * g_kappa*g_kappa*g_kappa*g_kappa / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2; 
        disc2[_GWI(mu,ix,VOLUME)+1] += fnorm * w.im;
/*        fprintf(stdout, "loop contribution: ix=%5d, mu=%2d, fnorm=%25.16e, w=%25.16e\n", ix, mu, fnorm, w.im); */
      }
/*
      fprintf(stdout, "-------------------------------------------\n");
      fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]);
      fprintf(stdout, "-------------------------------------------\n");
*/
    }
  }
/*
  sprintf(filename, "avc_disc_hpe5_3rd.%.4d", Nconf);
  ofs = fopen(filename, "w");
  for(ix=0; ix<VOLUME; ix++) {
    for(mu=0; mu<4; mu++) { 
      fprintf(ofs, "%6d%3d%25.16e\t%25.16e\n", ix, mu, disc[_GWI(mu,ix,VOLUME)], disc[_GWI(mu,ix,VOLUME)+1]);
    }
  }
  fclose(ofs);
  for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.;
*/
/*
  for(x0=0; x0<T; x0++) {
  for(x1=0; x1<LX; x1++) {
  for(x2=0; x2<LY; x2++) {
  for(x3=0; x3<LZ; x3++) {
    ix = g_ipt[x0][x1][x2][x3];
    for(mu=0; mu<4; mu++) {
      dxm[0]=0; dxm[1]=0; dxm[2]=0; dxm[3]=0; dxm[mu]=1;

      for(i=1; i<4; i++) {
        nu = (mu+i)%4;
        dxn[0]=0; dxn[1]=0; dxn[2]=0; dxn[3]=0; dxn[nu]=1;

        ixpm = g_ipt[(x0+dxm[0]+T)%T][(x1+dxm[1]+LX)%LX][(x2+dxm[2]+LY)%LY][(x3+dxm[3]+LZ)%LZ];
        ixpn = g_ipt[(x0+dxn[0]+T)%T][(x1+dxn[1]+LX)%LX][(x2+dxn[2]+LY)%LY][(x3+dxn[3]+LZ)%LZ];

        _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ix+18*mu, g_gauge_field + 72*ixpm+18*nu );
        _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ix+18*nu, g_gauge_field + 72*ixpn+18*mu );
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w1, U_);

        ixpm = g_ipt[(x0+dxm[0]-dxn[0]+T)%T][(x1+dxm[1]-dxn[1]+LX)%LX][(x2+dxm[2]-dxn[2]+LY)%LY][(x3+dxm[3]-dxn[3]+LZ)%LZ];
        ixpn = g_ipt[(x0-dxn[0]+T)%T][(x1-dxn[1]+LX)%LX][(x2-dxn[2]+LY)%LY][(x3-dxn[3]+LZ)%LZ];

        _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ixpn+18*nu, g_gauge_field + 72*ix+18*mu);
        _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ixpn+18*mu, g_gauge_field + 72*ixpm+18*nu);
        _cm_eq_cm_ti_cm_dag(U_, U1_, U2_);
        _co_eq_tr_cm(&w2, U_);

        disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im);
        fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e; w2=%25.16e\n", mu, ix, nu, w1.im, w2.im); 
      }
      fprintf(stdout, "-------------------------------------------\n");
      fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]);
      fprintf(stdout, "-------------------------------------------\n");
    }
  }
  }
  }
  }
*/

  /***********************************************
   * start loop on source id.s 
   ***********************************************/
  for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) {

    /* read the new propagator */
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(format==0) {
      sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid);
      if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break;
    }
    else if(format==1) {
      sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid);
      if(read_cmi(g_spinor_field[2], filename) != 0) break;
    }
    xchange_field(g_spinor_field[2]);
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime);

    if(do_gt==1) {
      /******************************************
       * gauge transform the propagators for sid
       ******************************************/
      for(ix=0; ix<VOLUME; ix++) {
        _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix));
        _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1);
      }
      xchange_field(g_spinor_field[2]);
    }

    count++;

    /************************************************
     * calculate the source: apply Q_phi_tbc 
     ************************************************/
#ifdef MPI
    ratime = MPI_Wtime();
#else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#endif
    Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]);
    xchange_field(g_spinor_field[0]); 
#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime);


    /************************************************
     * HPE: apply BH5 
     ************************************************/
    BH5(g_spinor_field[1], g_spinor_field[2]);

    /* add new contractions to (existing) disc */
#  ifdef MPI
    ratime = MPI_Wtime();
#  else
    ratime = (double)clock() / CLOCKS_PER_SEC;
#  endif
    for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */
      iix = _GWI(mu,0,VOLUME);
      for(ix=0; ix<VOLUME; ix++) {    /* loop on lattice sites */
        _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]);

        /* first contribution */
        _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_mi_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

        /* second contribution */
	_fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]);
	_fv_eq_gamma_ti_fv(spinor2, mu, spinor1);
	_fv_pl_eq_fv(spinor2, spinor1);
	_co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2);
	disc[iix  ] -= 0.5 * w.re;
	disc[iix+1] -= 0.5 * w.im;

	iix += 2;
      }  /* of ix */
    }    /* of mu */

#ifdef MPI
    retime = MPI_Wtime();
#else
    retime = (double)clock() / CLOCKS_PER_SEC;
#endif
    if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime);


    /************************************************
     * save results for count = multiple of Nsave 
     ************************************************/
    if(count%Nsave == 0) {

      if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count);

      fnorm = 1. / ( (double)count * g_prop_normsqr );
      if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
        for(ix=0; ix<VOLUME; ix++) {
          work[_GWI(mu,ix,VOLUME)  ] = disc[_GWI(mu,ix,VOLUME)  ] * fnorm + disc2[_GWI(mu,ix,VOLUME)  ];
          work[_GWI(mu,ix,VOLUME)+1] = disc[_GWI(mu,ix,VOLUME)+1] * fnorm + disc2[_GWI(mu,ix,VOLUME)+1];
        }
      }

      /* save the result in position space */
      sprintf(filename, "cvc_hpe5_X.%.4d.%.4d", Nconf, count);
      sprintf(contype, "cvc-disc-all-hpe-05-X");
      write_lime_contraction(work, filename, 64, 4, contype, Nconf, count);

/*
      sprintf(filename, "cvc_hpe5_Xascii.%.4d.%.4d", Nconf, count);
      write_contraction(work, NULL, filename, 4, 2, 0);
*/

#ifdef MPI
      ratime = MPI_Wtime();
#else
      ratime = (double)clock() / CLOCKS_PER_SEC;
#endif

      /* Fourier transform data, copy to work */
      for(mu=0; mu<4; mu++) {
        memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_m, in, NULL);
#endif
        memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));


        memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double));
#ifdef MPI
        fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER);
#else
        fftwnd_one(plan_p, in, NULL);
#endif
        memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double));
      }  /* of mu =0 ,..., 3*/

      fnorm = 1. / (double)(T_global*LX*LY*LZ);
      if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %e\n", fnorm);
      for(mu=0; mu<4; mu++) {
      for(nu=0; nu<4; nu++) {
        cp1 = (complex*)(work+_GWI(mu,0,VOLUME));
        cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME));
        cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME));
     
        for(x0=0; x0<T; x0++) {
	  q[0] = (double)(x0+Tstart) / (double)T_global;
        for(x1=0; x1<LX; x1++) {
	  q[1] = (double)(x1) / (double)LX;
        for(x2=0; x2<LY; x2++) {
	  q[2] = (double)(x2) / (double)LY;
        for(x3=0; x3<LZ; x3++) {
	  q[3] = (double)(x3) / (double)LZ;
	  ix = g_ipt[x0][x1][x2][x3];
	  w.re = cos( M_PI * (q[mu]-q[nu]) );
	  w.im = sin( M_PI * (q[mu]-q[nu]) );
	  _co_eq_co_ti_co(&w1, cp1, cp2);
	  _co_eq_co_ti_co(cp3, &w1, &w);
	  _co_ti_eq_re(cp3, fnorm);
	  cp1++; cp2++; cp3++;
	}
	}
	}
	}

      }
      }
  
      /* save the result in momentum space */
      sprintf(filename, "cvc_hpe5_P.%.4d.%.4d", Nconf, count);
      sprintf(contype, "cvc-disc-all-hpe-05-P");
      write_lime_contraction(work+_GWI(8,0,VOLUME), filename, 64, 16, contype, Nconf, count);
/*
      sprintf(filename, "cvc_hpe5_Pascii.%.4d.%.4d", Nconf, count);
      write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 2, 0);
*/
#ifdef MPI
      retime = MPI_Wtime();
#else
      retime = (double)clock() / CLOCKS_PER_SEC;
#endif
      if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime);

    }  /* of count % Nsave == 0 */

  }  /* of loop on sid */

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free(g_gauge_field);
  for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
  free(g_spinor_field);
  free_geometry();
  fftw_free(in);
  free(disc);

  free(work);

#ifdef MPI
  fftwnd_mpi_destroy_plan(plan_p);
  fftwnd_mpi_destroy_plan(plan_m);
  MPI_Finalize();
#else
  fftwnd_destroy_plan(plan_p);
  fftwnd_destroy_plan(plan_m);
#endif

  return(0);

}
Example #28
0
int main(int argc, char **argv) {
  
  int K=32, nfc=2, timeslice, status;
  int c, i, j, k, ll, t, id, mu, icol;
  int count, position=-1, position_set=0;
  size_t shift;
  long unsigned int VOL3;
  int filename_set = 0;
  int x0, x1, x2, x3, ix, idx;
  int n_c=1, n_s=4;
  int *xgindex1=NULL, *xgindex2=NULL, *xisimag=NULL;
  int write_ascii=0;
  int prop_single_file=0;
  double *xvsign=NULL;
  void *cconn=NULL;
  void *buffer = NULL;
  int sigmalight=0, sigmaheavy=0;
  double correlator_norm = 1.;
  void * vptr;
  int source_coords[4];

  int verbose = 0;
  size_t prec = 64, bytes;
  char filename[200];
  double ratime, retime;
  void  *chi=NULL, *psi=NULL;
  FILE *ofs=NULL, *ofs2=NULL;
  double c_conf_gamma_sign[]  = {1., 1., 1., -1., -1., -1., -1., 1., 1., 1., -1., -1.,  1.,  1., 1., 1.};
  double n_conf_gamma_sign[]  = {1., 1., 1., -1., -1., -1., -1., 1., 1., 1.,  1.,  1., -1., -1., 1., 1.};
  double *conf_gamma_sign=NULL;

  void *spinor_field=NULL;
  DML_Checksum *checksum=NULL;

  /**************************************************************************************************
   * charged stuff
   *
   * (pseudo-)scalar:
   * g5 - g5,	g5   - g0g5,	g0g5 - g5,	g0g5 - g0g5,
   * g0 - g0,	g5   - g0,	g0   - g5,	g0g5 - g0,
   * g0 - g0g5,	1    - 1,	1    - g5,	g5   - 1,
   * 1  - g0g5,	g0g5 - 1,	1    - g0,	g0   - 1
   *
   * (pseudo-)vector:
   * gig0 - gig0,	gi     - gi,		gig5 - gig5,	gig0   - gi,
   * gi   - gig0,	gig0   - gig5,		gig5 - gig0,	gi     - gig5,
   * gig5 - gi,		gig0g5 - gig0g5,	gig0 - gig0g5,	gig0g5 - gig0,
   * gi   - gig0g5,	gig0g5 - gi,		gig5 - gig0g5,	gig0g5 - gig5
   **************************************************************************************************/
  int gindex1[] = {5, 5, 6, 6, 0, 5, 0, 6, 0, 4, 4, 5, 4, 6, 4, 0,
                   10, 11, 12, 1, 2, 3, 7, 8, 9, 10, 11, 12, 1, 2, 3, 10, 11, 12, 7, 8, 9, 1, 2, 3, 7, 8, 9,
                   13, 14, 15, 10, 11, 12, 15, 14, 13, 1, 2, 3, 15, 14, 13, 7, 8, 9, 15, 14, 13};

  int gindex2[] = {5, 6, 5, 6, 0, 0, 5, 0, 6, 4, 5, 4, 6, 4, 0, 4,
                   10, 11, 12, 1, 2, 3, 7, 8, 9, 1, 2, 3, 10, 11, 12, 7, 8, 9, 10, 11, 12, 7, 8, 9, 1, 2, 3,
                   13, 14, 15, 15, 14, 13, 10, 11, 12, 15, 14, 13, 1, 2, 3, 15, 14, 13, 7, 8, 9};

  /* due to twisting we have several correlators that are purely imaginary */
  int isimag[]  = {0, 0, 0, 0, 
                   0, 1, 1, 1, 
                   1, 0, 1, 1, 
                   1, 1, 0, 0,

                   0, 0, 0, 0, 
                   0, 1, 1, 1, 
                   1, 0, 1, 1, 
                   1, 1, 0, 0};

  double vsign[]  = {1.,  1., 1.,   1.,  1., 1.,   1.,  1., 1.,   1.,  1., 1.,
                     1.,  1., 1.,   1.,  1., 1.,   1.,  1., 1.,   1.,  1., 1.,
                     1.,  1., 1.,   1.,  1., 1.,   1., -1., 1.,   1., -1., 1., 
                     1., -1., 1.,   1., -1., 1.,   1., -1., 1.,   1., -1., 1.};


  /**************************************************************************************************
   * neutral stuff 
   *
   * (pseudo-)scalar:
   * g5 - g5,	g5   - g0g5,	g0g5 - g5,	g0g5 - g0g5,
   * 1  - 1,	g5   - 1,	1    - g5,	g0g5 - 1,
   * 1  - g0g5,	g0   - g0,	g0   - g5,	g5   - g0,
   * g0 - g0g5,	g0g5 - g0,	g0   - 1,	1    - g0
   *
   * (pseudo-)vector:
   * gig0   - gig0,		gi   - gi,	gig0g5 - gig0g5,	gig0   - gi, 
   * gi     - gig0,		gig0 - gig0g5,	gig0g5 - gig0,		gi     - gig0g5,
   * gig0g5 - gi		gig5 - gig5,	gig5   - gi,		gi     - gig5,
   * gig5   - gig0,		gig0 - gig5,	gig5   - gig0g5,	gig0g5 - gig5
   **************************************************************************************************/
  int ngindex1[] = {5, 5, 6, 6, 4, 5, 4, 6, 4, 0, 0, 5, 0, 6, 0, 4,
                    10, 11, 12, 1, 2, 3, 13, 14, 15, 10, 11, 12,  1,  2,  3, 10, 11, 12, 15, 14, 13, 1, 2, 3, 15, 14, 13,
                     7,  8,  9, 7, 8, 9,  1,  2,  3,  7,  8,  9, 10, 11, 12,  7,  8,  9, 15, 14, 13};

  int ngindex2[] = {5, 6, 5, 6, 4, 4, 5, 4, 6, 0, 5, 0, 6, 0, 4, 0,
                    10, 11, 12, 1, 2, 3, 13, 14, 15,  1,  2,  3, 10, 11, 12, 15, 14, 13, 10, 11, 12, 15, 14, 13, 1, 2, 3,
                     7,  8,  9, 1, 2, 3,  7,  8,  9, 10, 11, 12,  7,  8,  9, 15, 14, 13,  7,  8, 9};

  int nisimag[]  = {0, 0, 0, 0,
                    0, 1, 1, 1,
                    1, 0, 1, 1,
                    1, 1, 0, 0,

                    0, 0, 0, 0,
                    0, 1, 1, 1, 
                    1, 0, 1, 1,
                    1, 1, 0, 0};

  double nvsign[] = {1.,  1., 1.,   1.,  1., 1.,   1.,  1., 1.,   1.,  1., 1., 
                     1.,  1., 1.,   1., -1., 1.,   1., -1., 1.,   1., -1., 1.,
                     1., -1., 1.,   1.,  1., 1.,   1.,  1., 1.,   1.,  1., 1.,
                     1.,  1., 1.,   1.,  1., 1.,   1., -1., 1.,   1., -1., 1. };
 

/*
  double isneg_std[]=    {+1., -1., +1., -1., +1., +1., +1., +1., -1., +1., +1., +1., +1., +1., +1., +1.,    
                          -1., +1., -1., -1., +1., +1., +1., -1., +1., -1., +1., +1., +1., +1., +1., +1.}; 
*/
  double isneg_std[]=    {+1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1.,    
                          +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1.};

  double isneg[32];


  while ((c = getopt(argc, argv, "sah?vf:c:p:n:P:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'c':
      n_c = atoi(optarg);
      break;
    case 'p':
      position = atoi(optarg);
      position_set = 1;
      break;
    case 'a':
      write_ascii = 1;
      fprintf(stdout, "# [] will write in ascii format\n"); 
      break;
    case 'n':
      nfc = atoi(optarg);
      fprintf(stdout, "# [] number of flavor combinations set to %d\n", nfc); 
      break;
    case 's':
      prop_single_file = 1;
      fprintf(stdout, "# [] will read up and down from same file\n"); 
      break;
    case 'P':
      prec = (size_t)atoi(optarg);
      fprintf(stdout, "# [] set precision to %lu\n", prec); 
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* the global time stamp */
  g_the_time = time(NULL);
  fprintf(stdout, "\n# [ll_conn_x2dep_extract] using global time stamp %s", ctime(&g_the_time));

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }

  if( Nlong > 0 ) {
    if(g_proc_id==0) fprintf(stdout, "Fuzzing not available in this version.\n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  VOL3 = LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T_global     = %3d\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
                  "# [%2d] LX_global    = %3d\n"\
                  "# [%2d] LX           = %3d\n"\
		  "# [%2d] LXstart      = %3d\n"\
                  "# [%2d] LY_global    = %3d\n"\
                  "# [%2d] LY           = %3d\n"\
		  "# [%2d] LYstart      = %3d\n",
		  g_cart_id, g_cart_id, T_global,  g_cart_id, T,  g_cart_id, Tstart, 
                             g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart,
                             g_cart_id, LY_global, g_cart_id, LY, g_cart_id, LYstart);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
#ifdef MPI
    MPI_Abort(MPI_COMM_WORLD, 2);
    MPI_Finalize();
#endif
    exit(1);
  }

  // switch to double precision of single precision was set
  if(prec == 32) {
    fprintf(stderr, "[] Warning: switching to double precision\n");
    prec = 64;
  }

  geometry();

  fprintf(stdout, "# ll_conn wout MPI\n");
  fprintf(stdout, "# number of colours = %d\n", n_c);

  if(position_set == 0) {
    position = g_propagator_position;
    if(g_cart_id == 0) fprintf(stdout, "# using input file value for prop pos %d\n", position);
  } else {
    if(g_cart_id == 0) fprintf(stdout, "# using command line arg value for prop pos %d\n", position);
  }

  /*********************************************
   * set the isneg field
   *********************************************/
  for(i = 0; i < K; i++) isneg[i] = isneg_std[i];

  /*********************************************************
   * allocate memory for the spinor fields 
   *********************************************************/
  no_fields = n_s;
  if(nfc>1) {
    no_fields *= 2;
  }
  if(prec==64) {
    spinor_field = calloc(no_fields, sizeof(double*));
  } else {
    spinor_field = calloc(no_fields, sizeof(float*));
  }

  if(g_cart_id==0) fprintf(stdout, "# no. of spinor fields is %d\n", no_fields);

  if(prec==64) {
    for(i=0; i<no_fields-1; i++) {
      ((double**)spinor_field)[i] = (double*)malloc(24*VOL3*sizeof(double));
      if( ((double**)spinor_field)[i] == NULL) {
        fprintf(stderr, "Error, could not alloc spinor field %d\n", i);
        exit(12);
      }
    }
    ((double**)spinor_field)[i] = (double*)malloc(24*VOL3*sizeof(double));
    if( ((double**)spinor_field)[i] == NULL) {
      fprintf(stderr, "Error, could not alloc spinor field %d\n", i);
      exit(12);
    }
  } else {
    for(i=0; i<no_fields-1; i++) {
      ((float**)spinor_field)[i] = (float*)malloc(24*VOL3*sizeof(float));
      if(((float**)spinor_field)[i] == NULL) {
        fprintf(stderr, "Error, could not alloc spinor field %d\n", i);
        exit(14);
      }
    }
    ((float**)spinor_field)[i] = (float*)malloc(24*VOL3*sizeof(float));
    if( ((float**)spinor_field)[i] == NULL) {
      fprintf(stderr, "Error, could not alloc spinor field %d\n", i);
      exit(14);
    }
  }

  checksum = (DML_Checksum*)malloc(2*n_c*n_s*sizeof(DML_Checksum));
  if(checksum == NULL) {
    fprintf(stderr, "[] Error, could not alloc checksumßn");
    exit(75);
  }

  /*********************************************************
   * allocate memory for the contractions
   *********************************************************/
  bytes = (prec==64) ? sizeof(double) : sizeof(float);
  cconn = calloc(2*nfc*K*VOL3, bytes);
  if( cconn==NULL ) {
    fprintf(stderr, "could not allocate memory for cconn\n");
    exit(3);
  }

  buffer  = calloc(2*nfc*K*LZ, bytes);
  if( buffer==NULL) {
    fprintf(stderr, "could not allocate memory for buffers\n");
    exit(4);
  }

  /******************************************************************
   * calculate source coordinates
   ******************************************************************/
  source_coords[0] = g_source_location / (LX_global*LY_global*LZ);
  source_coords[1] = ( g_source_location % (LX_global*LY_global*LZ) ) / (LY_global*LZ);
  source_coords[2] = ( g_source_location % (LY_global*LZ) ) / LZ;
  source_coords[3] = g_source_location % LZ;
  if(g_cart_id==0) fprintf(stdout, "# source coords = %3d%3d%3d%3d\n", source_coords[0], source_coords[1],
    source_coords[2], source_coords[3]);

  /******************************************************************
   * final normalization of the correlators
   ******************************************************************/
/*  correlator_norm = 1. / ( 2. * g_kappa * g_kappa * (double)(LX_global*LY_global*LZ) );*/
  correlator_norm = 1.;
  if(g_cart_id==0) fprintf(stdout, "# correlator_norm = %12.5e\n", correlator_norm);

  /******************************************************************
   ******************************************************************
   **                                                              **
   **  local - local                                               **
   **                                                              **
   ******************************************************************
   ******************************************************************/
  if(g_cart_id==0) fprintf(stdout, "# Starting LL\n");

  for(timeslice=0;timeslice<T_global;timeslice++) {
    if(prec==64) {
      for(idx=0; idx<2*nfc*K*VOL3; idx++) ((double*)cconn)[idx] = 0.;
    } else {
      for(idx=0; idx<2*nfc*K*VOL3; idx++) ((float*)cconn)[idx] = 0.;
    }

    for(icol=0;icol<n_c;icol++) {
      ratime = (double)clock() / CLOCKS_PER_SEC;
      for(i=0; i<n_s; i++) {
        if(prec==64) {
          sprintf(filename, "%s.%.4d.00.%.2d.inverted", filename_prefix, Nconf, n_c*i+icol);
          status = read_lime_spinor_timeslice( (double*)(((double**)spinor_field)[i]), timeslice, filename, position, checksum+n_c*i+icol);
        } else {
          fprintf(stderr, "[] Error, no single precision timeslice-wise reading yet\n");
          exit(72);
        }
        if (status != 0) {
          fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
          exit(73);
        }
        if(nfc>1) {
          if(prop_single_file) { 
            if(prec==64) {
              status = read_lime_spinor_timeslice( (double*)(((double**)spinor_field)[i+n_s]), timeslice, filename, 1-position, checksum+n_c*i+icol+n_s*n_c);
            } else {
              fprintf(stderr, "[] Error, no single precision timeslice-wise reading yet\n");
              exit(72);
            }
          } else {
            if(prec==64) {
              sprintf(filename, "%s.%.4d.00.%.2d.inverted", filename_prefix2, Nconf, n_c*i+icol);
              status = read_lime_spinor_timeslice((double*)(((double**)spinor_field)[i+n_s]), timeslice, filename, position, checksum+n_c*i+icol+n_s*n_c);
            } else {
              fprintf(stderr, "[] Error, no single precision timeslice-wise reading yet\n");
              exit(72);
            }
          }
          if (status != 0) {
            fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
            exit(74);
          }
        }  // of if nfc > 1
      }    // of is

      retime = (double)clock() / CLOCKS_PER_SEC;
      if(g_cart_id==0) fprintf(stdout, "# time for preparing light prop.: %e seconds\n", retime-ratime);
 
 
      ratime = (double)clock() / CLOCKS_PER_SEC;
      count = -1;
      for(sigmalight=1; sigmalight>=-1; sigmalight-=2) { 
      for(sigmaheavy=1; sigmaheavy>=-1; sigmaheavy-=2) {
        count++;
        if(count>=nfc) continue;

        if(prec==64) {
          chi = (void*) &( ((double**)spinor_field)[ ( (1-sigmalight)/2 )*n_s ] );
          psi = (void*) &( ((double**)spinor_field)[ ( (1-sigmaheavy)/2 )*n_s ] );
        } else {
          chi = (void*) &( ((float**)spinor_field)[ ( (1-sigmalight)/2 )*n_s ] );
          psi = (void*) &( ((float**)spinor_field)[ ( (1-sigmaheavy)/2 )*n_s ] );
        }
        if(sigmalight == sigmaheavy) {
          xgindex1 = gindex1;  xgindex2 = gindex2;  xisimag=isimag;  xvsign=vsign;  conf_gamma_sign = c_conf_gamma_sign;
        } else {
          xgindex1 = ngindex1; xgindex2 = ngindex2; xisimag=nisimag; xvsign=nvsign; conf_gamma_sign = n_conf_gamma_sign;
        }
 
        // (pseudo-)scalar sector
        for(idx=0; idx<16; idx++) {
          //fprintf(stdout, "# sigma(%d, %d): (idx,i) = (%d,%d) ---> (%d,%d)\n", 
          //    sigmalight, sigmaheavy, idx, i, xgindex1[idx], xgindex2[idx]);
          if(prec==64) {
            vptr = (void*)( ((double*)cconn) + 2*(count*K + idx) );
          } else {
            vptr = (void*)( ((float*)cconn) + 2*(count*K + idx) );
          }
          contract_twopoint_xdep_timeslice(vptr, xgindex1[idx], xgindex2[idx], chi, psi, 1, nfc*K, 1.0, prec);
        }
        // (pseudo-)vector sector
        for(idx = 16; idx < 64; idx+=3) {

          for(i = 0; i < 3; i++) {
            //if(xgindex1[idx+i]==xgindex2[idx+i] && (xgindex2[idx+i]==1 || xgindex2[idx+i]==2 || xgindex2[idx+i]==3) ) {
            //  fprintf(stdout, "# sigma(%d, %d): (idx,i) = (%d,%d) ---> (%d,%d); factor = %e\n", 
            //      sigmalight, sigmaheavy, idx, i, xgindex1[idx+i], xgindex2[idx+i], conf_gamma_sign[(idx-16)/3]*xvsign[idx-16+i]);
            //}
            if(prec==64) {
              vptr = (void*)( ((double*)cconn) + 2*(count*K + (16+(idx-16)/3)) );
            } else {
              vptr = (void*)( ((float*)cconn) + 2*(count*K + (16+(idx-16)/3)) );
            }
            contract_twopoint_xdep_timeslice(vptr, xgindex1[idx+i], xgindex2[idx+i], chi, psi, 1, nfc*K,
              conf_gamma_sign[(idx-16)/3]*xvsign[idx-16+i], prec);
          }
        }
      }}

    }    // of loop on colors

    /***************************************************************
     * write contractions to file
     ***************************************************************/

    ratime = (double)clock() / CLOCKS_PER_SEC;
    sprintf(filename, "correl.%.4d.t%.2dx%.2dy%.2dz%.2d", Nconf, source_coords[0], 
      source_coords[1], source_coords[2], source_coords[3]);

    if(timeslice == 0) {
      // fprintf(stdout, "# [] opening file %s for writing\n", filename);
      ofs = fopen(filename, "w");
    } else {
      // fprintf(stdout, "# [] opening file %s for appending\n", filename);
      ofs = fopen(filename, "a");
    }
    if(ofs==NULL) {
      fprintf(stderr, "Error, could not open file %s for writing\n", filename);
      exit(7);
    }

    if(write_ascii) {
      sprintf(filename, "correl.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", Nconf, source_coords[0], 
        source_coords[1], source_coords[2], source_coords[3]);
      if(timeslice == 0) {
        ofs2 = fopen(filename, "w");
      } else {
        ofs2 = fopen(filename, "a");
      }
    }
    for(x1=0; x1<LX_global; x1++) {
    for(x2=0; x2<LY_global; x2++) {
      shift = ( (x1 % LX) * LY + (x2 % LY) ) * LZ;

      if(prec==64) {
        vptr = (void*)( ((double*)cconn)+shift*2*nfc*K);
        bytes = sizeof(double);
      } else {
        vptr = (void*)( ((float*)cconn)+shift*2*nfc*K);
        bytes = sizeof(float);
      }
      if( fwrite(vptr, bytes, 2*nfc*K*LZ, ofs) != 2*nfc*K*LZ ) {
        fprintf(stderr, "Error, could not write proper amount of data\n");
        exit(8);
      }

      if(write_ascii) {
        for(x3=0; x3<LZ; x3++) {
          count = -1;
          for(j=0; j<nfc; j++) {
          for(i=0; i<K; i++) {
            count++;
            if(prec==64) {
              fprintf(ofs2, "%3d%3d%3d%3d%3d%3d%6lu%25.16e%25.16e\n", 
                j, i, timeslice, x1, x2, x3, shift, 
                ((double*)cconn)[(shift+x3)*2*nfc*K+2*count], ((double*)cconn)[(shift+x3)*2*nfc*K+2*count+1]);
            } else {
              fprintf(ofs2, "%3d%3d%3d%3d%3d%3d%6lu%16.7e%16.7e\n", 
                j, i, timeslice, x1, x2, x3, shift, 
                ((float*)cconn)[(shift+x3)*2*nfc*K+2*count], ((float*)cconn)[(shift+x3)*2*nfc*K+2*count+1]);
            }
          }}
        }
      }  // of if write_ascii
    }}
    if(g_cart_id==0) {
      if(ofs  != NULL) fclose(ofs);
      if(ofs2 != NULL) fclose(ofs2);
    }
    retime = (double)clock() / CLOCKS_PER_SEC;
    if(g_cart_id==0) fprintf(stdout, "# time to write LL contractions: %e seconds\n", retime-ratime);


  }  // of loop on timeslices

  if(g_cart_id==0) fprintf(stdout, "# finished LL contractions\n");

  /**************************************************
   * free the allocated memory, finalize 
   **************************************************/
  if(no_fields>0) {
    if(prec==64) {
      for(i=0; i<no_fields; i++) free( ((double**)spinor_field)[i]);
    } else {
      for(i=0; i<no_fields; i++) free( ((float**)spinor_field)[i]);
    }

    free(spinor_field); 
  }
  free_geometry(); 
  free(cconn);
  free(buffer); 
#ifdef MPI
  MPI_Finalize();
#endif

  fprintf(stdout, "\n# [ll_conn] %s# [ll_conn] end of run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "\n# [ll_conn] %s# [ll_conn] end of run\n", ctime(&g_the_time));
  fflush(stderr);

  return(0);

}
Example #29
0
int main(int argc, char **argv) {
  
  const int n_c=3;
  const int n_s=4;
  const char outfile_prefix[] = "delta_pp_2pt_v4";

  int c, i, icomp;
  int filename_set = 0;
  int append, status;
  int l_LX_at, l_LXstart_at;
  int ix, it, iix, x1,x2,x3;
  int ir, ir2, is;
  int VOL3;
  int do_gt=0;
  int dims[3];
  double *connt=NULL;
  spinor_propagator_type *connq=NULL;
  int verbose = 0;
  int sx0, sx1, sx2, sx3;
  int write_ascii=0;
  int fermion_type = 1;  // Wilson fermion type
  int pos;
  char filename[200], contype[200], gauge_field_filename[200];
  double ratime, retime;
  //double plaq_m, plaq_r;
  double *work=NULL;
  fermion_propagator_type *fp1=NULL, *fp2=NULL, *fp3=NULL, *uprop=NULL, *dprop=NULL, *fpaux=NULL;
  spinor_propagator_type *sp1=NULL, *sp2=NULL;
  double q[3], phase, *gauge_trafo=NULL;
  complex w, w1;
  size_t items, bytes;
  FILE *ofs;
  int timeslice;
  DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum;
  uint32_t nersc_gauge_field_checksum;
  int threadid, nthreads;

/*******************************************************************
 * Gamma components for the Delta:
 *                                                                 */
  const int num_component = 4;
  int gamma_component[2][4] = { {0, 1, 2, 3},
                                {0, 1, 2, 3} };
  double gamma_component_sign[4] = {+1.,+1.,-1.,+1.};
/*
 *******************************************************************/
  fftw_complex *in=NULL;
#ifdef MPI
   fftwnd_mpi_plan plan_p;
#else
   fftwnd_plan plan_p;
#endif 

#ifdef MPI
  MPI_Status status;
#endif

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "ah?vgf:F:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'a':
      write_ascii = 1;
      fprintf(stdout, "# [] will write in ascii format\n");
      break;
    case 'F':
      if(strcmp(optarg, "Wilson") == 0) {
        fermion_type = _WILSON_FERMION;
      } else if(strcmp(optarg, "tm") == 0) {
        fermion_type = _TM_FERMION;
      } else {
        fprintf(stderr, "[] Error, unrecognized fermion type\n");
        exit(145);
      }
      fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type);
      break;
    case 'g':
      do_gt = 1;
      fprintf(stdout, "# [] will perform gauge transform\n");
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

#ifdef OPENMP
  omp_set_num_threads(g_num_threads);
#else
  fprintf(stdout, "[delta_pp_2pt_v4] Warning, resetting global thread number to 1\n");
  g_num_threads = 1;
#endif

  /* initialize MPI parameters */
  mpi_init(argc, argv);

#ifdef OPENMP
  status = fftw_threads_init();
  if(status != 0) {
    fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status);
    exit(120);
  }
#endif

  /******************************************************
   *
   ******************************************************/
  VOL3 = LX*LY*LZ;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] parameters:\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  if(N_Jacobi>0) {

    // alloc the gauge field
    alloc_gauge_field(&g_gauge_field, VOL3);
    switch(g_gauge_file_format) {
      case 0:
        sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf);
        break;
      case 1:
        sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf);
        break;
    }
  } else {
    g_gauge_field = NULL;
  }


  /*********************************************************************
   * gauge transformation
   *********************************************************************/
  if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); }

  // determine the source location
  sx0 = g_source_location/(LX*LY*LZ)-Tstart;
  sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ);
  sx2 = (g_source_location%(LY*LZ)) / LZ;
  sx3 = (g_source_location%LZ);
//  g_source_time_slice = sx0;
  fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3);

  // allocate memory for the spinor fields
  g_spinor_field = NULL;
  no_fields = n_s*n_c;
//  if(fermion_type == _TM_FERMION) {
//    no_fields *= 2;
//  }
  if(N_Jacobi>0) no_fields++;
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields-1; i++) alloc_spinor_field(&g_spinor_field[i], VOL3);
  alloc_spinor_field(&g_spinor_field[no_fields-1], VOL3);
  work = g_spinor_field[no_fields-1];

  spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) );
  if(spinor_field_checksum == NULL ) {
    fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n");
    exit(73);
  }

  // allocate memory for the contractions
  items = 4* num_component*T;
  bytes = sizeof(double);
  connt = (double*)malloc(items*bytes);
  if(connt == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connt\n");
    exit(2);
  }
  for(ix=0; ix<items; ix++) connt[ix] = 0.;

  items = num_component * (size_t)VOL3;
  connq = create_sp_field( items );
  if(connq == NULL) {
    fprintf(stderr, "\n[] Error, could not alloc connq\n");
    exit(2);
  }


  /******************************************************
   * initialize FFTW
   ******************************************************/
  items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3;
  bytes = sizeof(double);
  in  = (fftw_complex*)malloc(num_component*g_sv_dim*g_sv_dim*VOL3*sizeof(fftw_complex));
  if(in == NULL) {
    fprintf(stderr, "[] Error, could not malloc in for FFTW\n");
    exit(155);
  }
  dims[0]=LX; dims[1]=LY; dims[2]=LZ;
  //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, num_component*g_sv_dim*g_sv_dim, (fftw_complex*)( connq[0][0] ), num_component*g_sv_dim*g_sv_dim);

  uprop = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fp1   = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fp2   = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fp3   = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  fpaux = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) );
  if(uprop==NULL || fp1==NULL || fp2==NULL || fp3==NULL || fpaux==NULL ) {
    fprintf(stderr, "[] Error, could not alloc fermion propagator points\n");
    exit(57);
  }
  sp1 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); 
  sp2 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); 
  if(sp1==NULL || sp2==NULL) {
    fprintf(stderr, "[] Error, could not alloc spinor propagator points\n");
    exit(59);
  }
  for(i=0;i<g_num_threads;i++) { create_fp(uprop+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fp1+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fp2+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fp3+i); }
  for(i=0;i<g_num_threads;i++) { create_fp(fpaux+i); }
  for(i=0;i<g_num_threads;i++) { create_sp(sp1+i); }
  for(i=0;i<g_num_threads;i++) { create_sp(sp2+i); }

  /******************************************************
   * loop on timeslices
   ******************************************************/
  for(timeslice=0; timeslice<T; timeslice++) {
    append = (int)( timeslice != 0 );

    // read timeslice of the gauge field
    if( N_Jacobi>0) {
      switch(g_gauge_file_format) {
        case 0:
          status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum);
          break;
        case 1:
          status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum);
          break;
      }
      if(status != 0) {
        fprintf(stderr, "[] Error, could not read gauge field\n");
        exit(21);
      }

#ifdef OPENMP
      status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, N_ape, alpha_ape);
#else
      for(i=0; i<N_ape; i++) { status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); }
#endif

    }

    // read timeslice of the 12 up-type propagators and smear them
    for(is=0;is<n_s*n_c;is++) {
      if(do_gt == 0) {
        sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is);
        status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is);
        if(status != 0) {
          fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename);
          exit(102);
        }
        if(N_Jacobi > 0) {
          fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n",
              is, N_Jacobi, kappa_Jacobi);
#ifdef OPENMP
          Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, N_Jacobi, kappa_Jacobi);
#else
          for(c=0; c<N_Jacobi; c++) {
            Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi);
          }
#endif
        }
      } else {  // of if do_gt == 0
        // apply gt
        apply_gt_prop(gauge_trafo, g_spinor_field[is], is/n_c, is%n_c, 4, filename_prefix, g_source_location);
      } // of if do_gt == 0
    }

    /******************************************************
     * contractions
     ******************************************************/
#ifdef OPENMP
  omp_set_num_threads(g_num_threads);
#pragma omp parallel private (ix,icomp,threadid) \
    firstprivate (fermion_type,gamma_component,num_component,connq,\
        gamma_component_sign,VOL3,g_spinor_field,fp1,fp2,fp3,fpaux,uprop,sp1,sp2)
{
    threadid = omp_get_thread_num();
#else
    threadid = 0;
#endif
    for(ix=threadid; ix<VOL3; ix+=g_num_threads)
    {
      // assign the propagators
      _assign_fp_point_from_field(uprop[threadid], g_spinor_field, ix);
      if(fermion_type == _TM_FERMION) {
        _fp_eq_rot_ti_fp(fp1[threadid], uprop[threadid], +1, fermion_type, fp2[threadid]);
        _fp_eq_fp_ti_rot(uprop[threadid], fp1[threadid], +1, fermion_type, fp2[threadid]);
      }

      for(icomp=0; icomp<num_component; icomp++) {

        _sp_eq_zero( connq[ix*num_component+icomp]);

        /******************************************************
         * prepare propagators
         ******************************************************/
        // fp1[threadid] = C Gamma_1 x S_u = g0 g2 Gamma_1 S_u
        _fp_eq_zero(fp1[threadid]);
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_gamma_ti_fp(fp1[threadid], gamma_component[0][icomp], uprop[threadid]);
        _fp_eq_gamma_ti_fp(fpaux[threadid], 2, fp1[threadid]);
        _fp_eq_gamma_ti_fp(fp1[threadid], 0, fpaux[threadid]);
        // fp2[threadid] = C Gamma_1 x S_u x C Gamma_2
        _fp_eq_zero(fp2[threadid]);
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_ti_gamma(fp2[threadid], 0, fp1[threadid]);
        _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp2[threadid]);
        _fp_eq_fp_ti_gamma(fp2[threadid], gamma_component[1][icomp], fpaux[threadid]);
        // fp3[threadid] = S_u x C Gamma_2 = S_u g0 g2 Gamma_2
        _fp_eq_zero(fp3[threadid]);
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_ti_gamma(fp3[threadid], 0, uprop[threadid]);
        _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp3[threadid]);
        _fp_eq_fp_ti_gamma(fp3[threadid], gamma_component[1][icomp], fpaux[threadid]);


        /******************************************************
         * contractions
         ******************************************************/
        // (1)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], uprop[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp1[threadid] );
        _sp_eq_fp_del_contract23_fp(sp1[threadid], fp3[threadid], fpaux[threadid]);
        // (2)
        // reduce to spin propagator
        _sp_eq_zero( sp2[threadid] );
        _sp_eq_fp_del_contract24_fp(sp2[threadid], fp3[threadid], fpaux[threadid]);
        // add and assign
        _sp_pl_eq_sp(sp1[threadid], sp2[threadid]);
        _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]);
        _sp_eq_sp( connq[ix*num_component+icomp], sp2[threadid]);

        // (3)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp1[threadid] );
        _sp_eq_fp_del_contract23_fp(sp1[threadid], uprop[threadid], fpaux[threadid]);
        // (4)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp2[threadid] );
        _sp_eq_fp_del_contract24_fp(sp2[threadid], uprop[threadid], fpaux[threadid]);
        // add and assign
        _sp_pl_eq_sp(sp1[threadid], sp2[threadid]);
        _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]);
        _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]);

        // (5)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp1[threadid] );
        _sp_eq_fp_del_contract34_fp(sp1[threadid], uprop[threadid], fpaux[threadid]);
        // (6)
        // reduce
        _fp_eq_zero(fpaux[threadid]);
        _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]);
        // reduce to spin propagator
        _sp_eq_zero( sp2[threadid] );
        _sp_eq_fp_del_contract34_fp(sp2[threadid], uprop[threadid], fpaux[threadid]);
        // add and assign
        _sp_pl_eq_sp(sp1[threadid], sp2[threadid]);
        _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]);
        _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]);
      }  // of icomp

    }    // of ix
#ifdef OPENMP
}
#endif

    /***********************************************
     * finish calculation of connq
     ***********************************************/
    if(g_propagator_bc_type == 0) {
      // multiply with phase factor
      fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice);
      ir = (timeslice - sx0 + T_global) % T_global;
      w1.re = cos( 3. * M_PI*(double)ir / (double)T_global );
      w1.im = sin( 3. * M_PI*(double)ir / (double)T_global );
      for(ix=0;ix<num_component*VOL3;ix++) {
        _sp_eq_sp(sp1[0], connq[ix] );
        _sp_eq_sp_ti_co( connq[ix], sp1[0], w1);
      }
    } else if (g_propagator_bc_type == 1) {
      // multiply with step function
      if(timeslice < sx0) {
        fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice);
        for(ix=0;ix<num_component*VOL3;ix++) {
          _sp_eq_sp(sp1[0], connq[ix] );
          _sp_eq_sp_ti_re( connq[ix], sp1[0], -1.);
        }
      }
    }
  
    if(write_ascii) {
      sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
      write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
    }

    /******************************************************************
     * Fourier transform
     ******************************************************************/
    items =  2 * num_component * g_sv_dim * g_sv_dim * VOL3;
    bytes = sizeof(double);

    memcpy(in, connq[0][0], items * bytes);
    ir = num_component * g_sv_dim * g_sv_dim;
#ifdef OPENMP
    fftwnd_threads(g_num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
#else
    fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1);
#endif

    // add phase factor from the source location
    iix = 0;
    for(x1=0;x1<LX;x1++) {
      q[0] = (double)x1 / (double)LX;
    for(x2=0;x2<LY;x2++) {
      q[1] = (double)x2 / (double)LY;
    for(x3=0;x3<LZ;x3++) {
      q[2] = (double)x3 / (double)LZ;
      phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 );
      w1.re = cos(phase);
      w1.im = sin(phase);

      for(icomp=0; icomp<num_component; icomp++) {
        _sp_eq_sp(sp1[0], connq[iix] );
        _sp_eq_sp_ti_co( connq[iix], sp1[0], w1) ;
        iix++; 
      }
    }}}  // of x3, x2, x1

    // write to file
    sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
    sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0);
    write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice);

    if(write_ascii) {
      strcat(filename, ".ascii");
      write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append);
    }


    /***********************************************
     * calculate connt
     ***********************************************/
    for(icomp=0;icomp<num_component; icomp++) {
      // fwd
      _sp_eq_sp(sp1[0], connq[icomp]);
      _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]);
      _sp_pl_eq_sp(sp1[0], sp2[0]);
      _co_eq_tr_sp(&w, sp1[0]);
      connt[2*(icomp*T + timeslice)  ] = w.re * 0.25;
      connt[2*(icomp*T + timeslice)+1] = w.im * 0.25;
      // bwd
      _sp_eq_sp(sp1[0], connq[icomp]);
      _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]);
      _sp_mi_eq_sp(sp1[0], sp2[0]);
      _co_eq_tr_sp(&w, sp1[0]);
      connt[2*(icomp*T+timeslice + num_component*T)  ] = w.re * 0.25;
      connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25;
    }

  }  // of loop on timeslice



  // write connt
  sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
  ofs = fopen(filename, "w");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
    exit(3);
  }
  fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);

  for(icomp=0; icomp<num_component; icomp++) {
    ir = sx0;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf);
    for(it=1;it<T/2;it++) {
      ir  = ( it + sx0 ) % T_global;
      ir2 = ( (T_global - it) + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf);
    }
    ir = ( it + sx0 ) % T_global;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf);
  }
  fclose(ofs);

  sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3);
  ofs = fopen(filename, "w");
  if(ofs == NULL) {
    fprintf(stderr, "[] Error, could not open file %s for writing\n", filename);
    exit(3);
  }
  fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf);

  for(icomp=0; icomp<num_component; icomp++) {
    ir = sx0;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
    for(it=1;it<T/2;it++) {
      ir  = ( it + sx0 ) % T_global;
      ir2 = ( (T_global - it) + sx0 ) % T_global;
      fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf);
    }
    ir = ( it + sx0 ) % T_global;
    fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf);
  }
  fclose(ofs);

  /***********************************************
   * free the allocated memory, finalize
   ***********************************************/
  free_geometry();
  if(connt!= NULL) free(connt);
  if(connq!= NULL) free(connq);
  if(gauge_trafo != NULL) free(gauge_trafo);

  if(g_spinor_field!=NULL) {
    for(i=0; i<no_fields; i++) free(g_spinor_field[i]);
    free(g_spinor_field); g_spinor_field=(double**)NULL;
  }
  if(spinor_field_checksum !=NULL) free(spinor_field_checksum);
  if(g_gauge_field != NULL) free(g_gauge_field);

  for(i=0;i<g_num_threads;i++) { free_fp(uprop+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fp1+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fp2+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fp3+i); }
  for(i=0;i<g_num_threads;i++) { free_fp(fpaux+i); }
  for(i=0;i<g_num_threads;i++) { free_sp(sp1+i); }
  for(i=0;i<g_num_threads;i++) { free_sp(sp2+i); }
  if(uprop!=NULL) free(uprop);
  if(fp1!=NULL) free(fp1);
  if(fp2!=NULL) free(fp2);
  if(fp3!=NULL) free(fp3);
  if(fpaux!=NULL) free(fpaux);
  if(sp1!=NULL) free(sp1);
  if(sp2!=NULL) free(sp2);

  free(in);
  fftwnd_destroy_plan(plan_p);

  g_the_time = time(NULL);
  fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stdout);
  fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time));
  fflush(stderr);

#ifdef MPI
  MPI_Finalize();
#endif
  return(0);
}
Example #30
0
int main(int argc, char **argv) {
  
  int c, mu;
  int filename_set = 0;
  int sl0, sl1, sl2, sl3;
  double *disc;
  double vp1[8], vp2[8], vp3[8], vp4[8], vp5[8];
  char filename[200];

  while ((c = getopt(argc, argv, "h?vf:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /**************************************
   * set the default values, read input
   **************************************/
  if(filename_set==0) strcpy(filename, "cvc.input.test");
  fprintf(stdout, "# Reading test input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }
  if(g_kappa == 0.) {
    if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  T = T_global;

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  /********************************
   * the source locaton 
   ********************************/
  sl0 = g_source_location/(LX*LY*LZ);
  sl1 = ( g_source_location%(LX*LY*LZ) ) / (LY*LZ);
  sl2 = ( g_source_location%(LY*LZ) ) / (LZ);
  sl3 = g_source_location%LZ;
  fprintf(stdout, "# global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3);
  
  if( (disc = (double*)malloc(32*VOLUME*sizeof(double))) == (double*)NULL) {
    exit(102);
  }
   
  /*******************************************************************
   * (1) comparison of results from
   *   - avc_disc_stochastic
   *   - avc_disc_hpe and avc_disc_hpe5
   *   - vp_disc_hpe_loops_red/vp_disc_hpe_stoch to 3rd and 5th order
   *******************************************************************/

  sprintf(filename, "outcvc_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading avc_disc_stochastic data from file %s\n", filename);
  read_contraction(disc, NULL, filename, 4);
  for(mu=0; mu<4; mu++) {
    vp1[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ] / 60.;
    vp1[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1] / 60.;
  }


  sprintf(filename, "cvc_hpe_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading avc_disc_hpe data from file %s\n", filename);
  read_contraction(disc, NULL, filename, 4);
  for(mu=0; mu<4; mu++) {
    vp2[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ] / 60.;
    vp2[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1] / 60.;
  }

  sprintf(filename, "cvc_hpe5_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading avc_disc_hpe5 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp3[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp3[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }

  sprintf(filename, "vp_disc_hpe03_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading vp_disc_hpe03 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp4[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp4[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }

  sprintf(filename, "vp_disc_hpe05_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading vp_disc_hpe05 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp5[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp5[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }

  for(mu=0; mu<4; mu++) {
    
    fprintf(stdout, "\n#--------------------------------------------\n"\
      "# mu = %d\n", mu);
    fprintf(stdout, "%30s%30s%30s\n", "method", "real part", "imaginary part");
    fprintf(stdout, "%30s%30.16e%30.16e\n", "avc_disc_stochastic", vp1[2*mu], vp1[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "avc_disc_hpe", vp2[2*mu], vp2[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "avc_disc_hpe5", vp3[2*mu], vp3[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe03", vp4[2*mu], vp4[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe05", vp5[2*mu], vp5[2*mu+1]);
  }

  fprintf(stdout, "\n#=======================================================\n");

  /*******************************************************************
   * (2) comparison of results from
   *   - lvc_disc_stochastic 
   *   - lvc_disc_hpe for 4th and 6th order
   *******************************************************************/
  
  sprintf(filename, "outlvc_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading lvc_disc_stochastic data from file %s\n", filename);
  read_contraction(disc, NULL, filename, 4);
  for(mu=0; mu<4; mu++) {
    vp1[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ] / 60.;
    vp1[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1] / 60.;
  }

  sprintf(filename, "lvc_disc_hpe04_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading lvc_disc_hpe04 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp2[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp2[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }

  sprintf(filename, "lvc_disc_hpe06_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading lvc_disc_hpe06 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp3[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp3[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }

  for(mu=0; mu<4; mu++) {
    
    fprintf(stdout, "\n#--------------------------------------------\n"\
      "# mu = %d\n", mu);
    fprintf(stdout, "%30s%30s%30s\n", "method", "real part", "imaginary part");
    fprintf(stdout, "%30s%30.16e%30.16e\n", "lvc_disc_stochastic", vp1[2*mu], vp1[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "lvc_disc_hpe04", vp2[2*mu], vp2[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "lvc_disc_hpe06", vp3[2*mu], vp3[2*mu+1]);
  }

  fprintf(stdout, "\n#=======================================================\n");


  /*******************************************************************
   * (3) comparison of results from
   *   - vp_disc_hpe_mc1/2 for 3rd and 5th order
   *******************************************************************/
  sprintf(filename, "vp_disc_hpe-01_mc2_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading vp_disc_hpe-01_mc2 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp1[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp1[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }

  sprintf(filename, "vp_disc_hpe03_mc2_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading vp_disc_hpe03_mc2 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp2[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp2[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }

  sprintf(filename, "vp_disc_hpe05_mc2_X.%.4d.%.4d", Nconf, Nsave);
  fprintf(stdout, "\n# Reading vp_disc_hpe05_mc2 data from file %s\n", filename);
  read_lime_contraction(disc, filename, 4, 0);
  for(mu=0; mu<4; mu++) {
    vp3[2*mu  ] = disc[_GWI(mu,g_source_location,VOLUME)  ];
    vp3[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1];
  }


  for(mu=0; mu<4; mu++) {
    
    fprintf(stdout, "\n#--------------------------------------------\n"\
      "# mu = %d\n", mu);
    fprintf(stdout, "%30s%30s%30s\n", "method", "real part", "imaginary part");
    fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe00_mc", vp1[2*mu], vp1[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe03_mc", vp2[2*mu], vp2[2*mu+1]);
    fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe05_mc", vp3[2*mu], vp3[2*mu+1]);
  }

  fprintf(stdout, "\n#=======================================================\n");

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/
  free_geometry();
  free(disc);

#ifdef MPI
  MPI_Finalize();
#endif

  return(0);

}