int tmLQCD_read_gauge(const int nconfig) {
  char conf_filename[500];
  if(!tmLQCD_invert_initialised) {
    fprintf(stderr, "tmLQCD_read_gauge: tmLQCD_inver_init must be called first. Aborting...\n");
    return(-1);
  }

  if(strcmp(gauge_input_filename, "identity") == 0) {
    if(g_cart_id == 0) fprintf(stdout, "# [invert] initializing unit gauge field\n");
    unit_g_gauge_field();
  } else {

    sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nconfig);
    int j=0;
    if (g_cart_id == 0) {
      printf("#\n# Trying to read gauge field from file %s.\n",
  	   conf_filename);
      fflush(stdout);
    }
    if( (j = read_gauge_field(conf_filename,g_gauge_field)) !=0) {
      fprintf(stderr, "tmLQCD_read_gauge: Error %d while reading gauge field from %s\n ...\n", j, conf_filename);
      return(-1);
    }
  }

  if (g_cart_id == 0) {
    printf("# Finished reading gauge field.\n");
    fflush(stdout);
  }
#ifdef MPI
  xchange_gauge(g_gauge_field);
#endif
  return(0);
}
Пример #2
0
void SU3Test(int argc, char **argv) {

  for (int i =1;i < argc; i++){    
    if(process_command_line_option(argc, argv, &i) == 0){
      continue;
    }  
    
    fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
    usage(argv);
  }

  init();

  if (strcmp(latfile,"")) {  // load in the command line supplied gauge field
    read_gauge_field(latfile, gauge, param.cpu_prec, param.X, argc, argv);
    construct_gauge_field((void**)gauge, 2, param.cpu_prec, &param);
  } else { // generate a random SU(3) field
    printf("Randomizing fields...");
    construct_gauge_field((void**)gauge, 1, param.cpu_prec, &param);
    printf("done.\n");
  }

  loadGaugeQuda(gauge, &param);
  saveGaugeQuda(new_gauge, &param);

  check_gauge(gauge, new_gauge, 1e-3, param.cpu_prec);

  end();
}
Пример #3
0
void  load_config_from_file(su3 **in, char * filename) 
{
  int x, mu;
  su3 ** temp_su3;
  su3 * tsu3;
  temp_su3 = (su3**)calloc(VOLUME, sizeof(su3*));
  tsu3 = (su3*)calloc(4*VOLUME, sizeof(su3));
  temp_su3[0] = tsu3;
  for(x = 0; x < VOLUME; x++) {
    temp_su3[x] = temp_su3[x]+4;
  }

  for(x = 0; x < VOLUME; x++) {
    for(mu = 0; mu < 4; mu++) {
      _su3_assign(temp_su3[x][mu], g_gauge_field[x][mu]);
    }
  }
  read_gauge_field(filename);
  for(x = 0; x < VOLUME; x++) {
    for(mu = 0; mu < 4; mu++) {
      _su3_assign((in[x][mu]), g_gauge_field[x][mu]);
      _su3_assign(g_gauge_field[x][mu], temp_su3[x][mu]);
    }
  }
  free(temp_su3);
  free(tsu3);
}
Пример #4
0
void SU3Test(int argc, char **argv) {

  init();

  char *latfile = "";//"16_64.lat";
  if (strcmp(latfile,"")) {  // load in the command line supplied gauge field
    read_gauge_field(latfile, gauge, param.cpu_prec, param.X, argc, argv);
    construct_gauge_field((void**)gauge, 2, param.cpu_prec, &param);
  } else { // generate a random SU(3) field
    printf("Randomizing fields...");
    construct_gauge_field((void**)gauge, 1, param.cpu_prec, &param);
    printf("done.\n");
  }

  loadGaugeQuda(gauge, &param);
  saveGaugeQuda(new_gauge, &param);

  check_gauge(gauge, new_gauge, 1e-3, param.cpu_prec);

  end();
}
Пример #5
0
int main(int argc,char *argv[])
{
  int j,j_max,k,k_max = 2;
  paramsXlfInfo *xlfInfo;
  int ix, n, *nn,*mm,i;
  double delta, deltamax;
  spinor rsp;
  int status = 0;
#ifdef MPI
  DUM_DERI = 6;
  DUM_SOLVER = DUM_DERI+2;
  DUM_MATRIX = DUM_SOLVER+6;
  NO_OF_SPINORFIELDS = DUM_MATRIX+2;

  MPI_Init(&argc, &argv);
#endif
  g_rgi_C1 = 1.; 
  
  /* Read the input file */
  read_input("hopping_test.input");
  
  tmlqcd_mpi_init(argc, argv);
  
  if(g_proc_id==0) {
#ifdef SSE
    printf("# The code was compiled with SSE instructions\n");
#endif
#ifdef SSE2
    printf("# The code was compiled with SSE2 instructions\n");
#endif
#ifdef SSE3
    printf("# The code was compiled with SSE3 instructions\n");
#endif
#ifdef P4
    printf("# The code was compiled for Pentium4\n");
#endif
#ifdef OPTERON
    printf("# The code was compiled for AMD Opteron\n");
#endif
#ifdef _GAUGE_COPY
    printf("# The code was compiled with -D_GAUGE_COPY\n");
#endif
#ifdef BGL
    printf("# The code was compiled for Blue Gene/L\n");
#endif
#ifdef BGP
    printf("# The code was compiled for Blue Gene/P\n");
#endif
#ifdef _USE_HALFSPINOR
    printf("# The code was compiled with -D_USE_HALFSPINOR\n");
#endif    
#ifdef _USE_SHMEM
    printf("# the code was compiled with -D_USE_SHMEM\n");
#  ifdef _PERSISTENT
    printf("# the code was compiled for persistent MPI calls (halfspinor only)\n");
#  endif
#endif
#ifdef _INDEX_INDEP_GEOM
    printf("# the code was compiled with index independent geometry\n");
#endif
#ifdef MPI
#  ifdef _NON_BLOCKING
    printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n");
#  endif
#  ifdef _USE_TSPLITPAR
    printf("# the code was compiled with tsplit parallelization\n");
#  endif
#endif
    printf("\n");
    fflush(stdout);
  }

  
#ifdef _GAUGE_COPY
  init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
#else
  init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
#endif
  init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);

  if(even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1);
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max);
  }

  if ( j!= 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(0);
  }
  j = init_moment_field(VOLUME, VOLUMEPLUSRAND);
  if ( j!= 0) {
    fprintf(stderr, "Not enough memory for moment fields! Aborting...\n");
    exit(0);
  }
  
  if(g_proc_id == 0) {
    fprintf(stdout,"The number of processes is %d \n",g_nproc);
    printf("# The lattice size is %d x %d x %d x %d\n",
	   (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ));
    printf("# The local lattice size is %d x %d x %d x %d\n", 
	   (int)(T), (int)(LX), (int)(LY),(int) LZ);
    if(even_odd_flag) {
      printf("# testinging the even/odd preconditioned Dirac operator\n");
    }
    else {
      printf("# testinging the standard Dirac operator\n");
    }
    fflush(stdout);
  }
  
  /* define the geometry */
  geometry();
  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if ( j!= 0) {
    fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n");
    exit(0);
  }
  if(g_sloppy_precision_flag == 1) {
    g_sloppy_precision = 1;
    j = init_dirac_halfspinor32();
    if ( j!= 0) {
      fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n");
      exit(0);
    }
  }
#  if (defined _PERSISTENT)
  init_xchange_halffield();
#  endif
#endif  

  status = check_geometry();
  if (status != 0) {
    fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n");
    exit(1);
  }

#if (defined MPI && !(defined _USE_SHMEM))
  check_xchange(); 
#endif

  start_ranlux(1, 123456);

  xlfInfo = construct_paramsXlfInfo(0.5, 0);

  random_gauge_field(reproduce_randomnumber_flag);
  if ( startoption == 2 ) {  /* restart */ 
    write_gauge_field(gauge_input_filename,gauge_precision_write_flag,xlfInfo);
  } else if ( startoption == 0 ) { /* cold */
    unit_g_gauge_field();
  } else if (startoption == 3 ) { /* continue */
    read_gauge_field(gauge_input_filename);
  } else if ( startoption == 1 ) { /* hot */
  }


#ifdef MPI
  /*For parallelization: exchange the gaugefield */
  xchange_gauge();
#endif

#ifdef _GAUGE_COPY
  update_backward_gauge();
#endif

  if(even_odd_flag) {
    /*initialize the pseudo-fermion fields*/
    j_max=1;
    for (k = 0; k < k_max; k++) {
      random_spinor_field(g_spinor_field[k], VOLUME/2, 0);
    }

    if (read_source_flag == 2) { /* save */
      /* even first, odd second */
      write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename);  
    }	else if (read_source_flag == 1) { /* yes */
      /* even first, odd second */
      read_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename,-1,0); 
# if (!defined MPI)
      if (write_cp_flag == 1) {
	strcat(SourceInfo.basename,".2");
	read_spinorfield_cm_single(g_spinor_field[2],g_spinor_field[3],SourceInfo.basename,-1,0); 

	nn=(int*)calloc(VOLUME,sizeof(int));
	if((void*)nn == NULL) return(100);
	mm=(int*)calloc(VOLUME,sizeof(int));
	if((void*)mm == NULL) return(100);

	n=0;
	deltamax=0.0;
	for(ix=0;ix<VOLUME/2;ix++){
	  (rsp.s0).c0.re = (g_spinor_field[2][ix].s0).c0.re - (g_spinor_field[0][ix].s0).c0.re;
	  (rsp.s0).c0.im = (g_spinor_field[2][ix].s0).c0.im - (g_spinor_field[0][ix].s0).c0.im;
	  (rsp.s0).c1.re = (g_spinor_field[2][ix].s0).c1.re - (g_spinor_field[0][ix].s0).c1.re;
	  (rsp.s0).c1.im = (g_spinor_field[2][ix].s0).c1.im - (g_spinor_field[0][ix].s0).c1.im;
	  (rsp.s0).c2.re = (g_spinor_field[2][ix].s0).c2.re - (g_spinor_field[0][ix].s0).c2.re;
	  (rsp.s0).c2.im = (g_spinor_field[2][ix].s0).c2.im - (g_spinor_field[0][ix].s0).c2.im;
	  (rsp.s1).c0.re = (g_spinor_field[2][ix].s1).c0.re - (g_spinor_field[0][ix].s1).c0.re;
	  (rsp.s1).c0.im = (g_spinor_field[2][ix].s1).c0.im - (g_spinor_field[0][ix].s1).c0.im;
	  (rsp.s1).c1.re = (g_spinor_field[2][ix].s1).c1.re - (g_spinor_field[0][ix].s1).c1.re;
	  (rsp.s1).c1.im = (g_spinor_field[2][ix].s1).c1.im - (g_spinor_field[0][ix].s1).c1.im;
	  (rsp.s1).c2.re = (g_spinor_field[2][ix].s1).c2.re - (g_spinor_field[0][ix].s1).c2.re;
	  (rsp.s1).c2.im = (g_spinor_field[2][ix].s1).c2.im - (g_spinor_field[0][ix].s1).c2.im;
	  (rsp.s2).c0.re = (g_spinor_field[2][ix].s2).c0.re - (g_spinor_field[0][ix].s2).c0.re;
	  (rsp.s2).c0.im = (g_spinor_field[2][ix].s2).c0.im - (g_spinor_field[0][ix].s2).c0.im;
	  (rsp.s2).c1.re = (g_spinor_field[2][ix].s2).c1.re - (g_spinor_field[0][ix].s2).c1.re;
	  (rsp.s2).c1.im = (g_spinor_field[2][ix].s2).c1.im - (g_spinor_field[0][ix].s2).c1.im;
	  (rsp.s2).c2.re = (g_spinor_field[2][ix].s2).c2.re - (g_spinor_field[0][ix].s2).c2.re;
	  (rsp.s2).c2.im = (g_spinor_field[2][ix].s2).c2.im - (g_spinor_field[0][ix].s2).c2.im;
	  (rsp.s3).c0.re = (g_spinor_field[2][ix].s3).c0.re - (g_spinor_field[0][ix].s3).c0.re;
	  (rsp.s3).c0.im = (g_spinor_field[2][ix].s3).c0.im - (g_spinor_field[0][ix].s3).c0.im;
	  (rsp.s3).c1.re = (g_spinor_field[2][ix].s3).c1.re - (g_spinor_field[0][ix].s3).c1.re;
	  (rsp.s3).c1.im = (g_spinor_field[2][ix].s3).c1.im - (g_spinor_field[0][ix].s3).c1.im;
	  (rsp.s3).c2.re = (g_spinor_field[2][ix].s3).c2.re - (g_spinor_field[0][ix].s3).c2.re;
	  (rsp.s3).c2.im = (g_spinor_field[2][ix].s3).c2.im - (g_spinor_field[0][ix].s3).c2.im;
	  _spinor_norm_sq(delta,rsp);
	  if (delta > 1.0e-12) {
	    nn[n] = g_eo2lexic[ix];
	    mm[n]=ix;
	    n++;	    
	  }
	  if(delta>deltamax) deltamax=delta;
	}
	if (n>0){
	  printf("mismatch in even spincolorfield in %d points:\n",n);
	  for(i=0; i< MIN(n,1000); i++){
	    printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],(g_spinor_field[2][mm[i]].s0).c0.re, (g_spinor_field[0][mm[i]].s0).c0.re);fflush(stdout);
	  }
	}
	n = 0;
	for(ix=0;ix<VOLUME/2;ix++){
	  (rsp.s0).c0.re = (g_spinor_field[3][ix].s0).c0.re - (g_spinor_field[1][ix].s0).c0.re;
	  (rsp.s0).c0.im = (g_spinor_field[3][ix].s0).c0.im - (g_spinor_field[1][ix].s0).c0.im;
	  (rsp.s0).c1.re = (g_spinor_field[3][ix].s0).c1.re - (g_spinor_field[1][ix].s0).c1.re;
	  (rsp.s0).c1.im = (g_spinor_field[3][ix].s0).c1.im - (g_spinor_field[1][ix].s0).c1.im;
	  (rsp.s0).c2.re = (g_spinor_field[3][ix].s0).c2.re - (g_spinor_field[1][ix].s0).c2.re;
	  (rsp.s0).c2.im = (g_spinor_field[3][ix].s0).c2.im - (g_spinor_field[1][ix].s0).c2.im;
	  (rsp.s1).c0.re = (g_spinor_field[3][ix].s1).c0.re - (g_spinor_field[1][ix].s1).c0.re;
	  (rsp.s1).c0.im = (g_spinor_field[3][ix].s1).c0.im - (g_spinor_field[1][ix].s1).c0.im;
	  (rsp.s1).c1.re = (g_spinor_field[3][ix].s1).c1.re - (g_spinor_field[1][ix].s1).c1.re;
	  (rsp.s1).c1.im = (g_spinor_field[3][ix].s1).c1.im - (g_spinor_field[1][ix].s1).c1.im;
	  (rsp.s1).c2.re = (g_spinor_field[3][ix].s1).c2.re - (g_spinor_field[1][ix].s1).c2.re;
	  (rsp.s1).c2.im = (g_spinor_field[3][ix].s1).c2.im - (g_spinor_field[1][ix].s1).c2.im;
	  (rsp.s2).c0.re = (g_spinor_field[3][ix].s2).c0.re - (g_spinor_field[1][ix].s2).c0.re;
	  (rsp.s2).c0.im = (g_spinor_field[3][ix].s2).c0.im - (g_spinor_field[1][ix].s2).c0.im;
	  (rsp.s2).c1.re = (g_spinor_field[3][ix].s2).c1.re - (g_spinor_field[1][ix].s2).c1.re;
	  (rsp.s2).c1.im = (g_spinor_field[3][ix].s2).c1.im - (g_spinor_field[1][ix].s2).c1.im;
	  (rsp.s2).c2.re = (g_spinor_field[3][ix].s2).c2.re - (g_spinor_field[1][ix].s2).c2.re;
	  (rsp.s2).c2.im = (g_spinor_field[3][ix].s2).c2.im - (g_spinor_field[1][ix].s2).c2.im;
	  (rsp.s3).c0.re = (g_spinor_field[3][ix].s3).c0.re - (g_spinor_field[1][ix].s3).c0.re;
	  (rsp.s3).c0.im = (g_spinor_field[3][ix].s3).c0.im - (g_spinor_field[1][ix].s3).c0.im;
	  (rsp.s3).c1.re = (g_spinor_field[3][ix].s3).c1.re - (g_spinor_field[1][ix].s3).c1.re;
	  (rsp.s3).c1.im = (g_spinor_field[3][ix].s3).c1.im - (g_spinor_field[1][ix].s3).c1.im;
	  (rsp.s3).c2.re = (g_spinor_field[3][ix].s3).c2.re - (g_spinor_field[1][ix].s3).c2.re;
	  (rsp.s3).c2.im = (g_spinor_field[3][ix].s3).c2.im - (g_spinor_field[1][ix].s3).c2.im;
	  _spinor_norm_sq(delta,rsp);
	  if (delta > 1.0e-12) {
	    nn[n]=g_eo2lexic[ix+(VOLUME+RAND)/2];
	    mm[n]=ix;
	    n++;	    
	  }
	  if(delta>deltamax) deltamax=delta;
	}
	if (n>0){
	  printf("mismatch in odd spincolorfield in %d points:\n",n);
	  for(i=0; i< MIN(n,1000); i++){
	    printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],(g_spinor_field[3][mm[i]].s0).c0.re, (g_spinor_field[1][mm[i]].s0).c0.re);fflush(stdout);
	  }
	}
	printf("max delta=%e",deltamax);fflush(stdout);
      }
# endif
    }
    
    if (read_source_flag > 0 && write_cp_flag == 0) { /* read-source yes or nobutsave; checkpoint no */
      /* first spinorial arg is output, the second is input */
      Hopping_Matrix(1, g_spinor_field[1], g_spinor_field[0]);      /*ieo=1 M_{eo}*/
      Hopping_Matrix(0, g_spinor_field[0], g_spinor_field[1]);      /*ieo=0 M_{oe}*/
      strcat(SourceInfo.basename,".out");
      write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename);
      printf("Check-field printed. Exiting...\n");
      fflush(stdout);
    }

#ifdef MPI
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
#endif
  }

  free_gauge_field();
  free_geometry_indices();
  free_spinor_field();
  free_moment_field();
  return(0);
}
Пример #6
0
void init(int argc, char **argv) {


  kernelPackT = false; // Set true for kernel T face packing
  cuda_prec= prec;

  gauge_param = newQudaGaugeParam();
  inv_param = newQudaInvertParam();

  gauge_param.X[0] = xdim;
  gauge_param.X[1] = ydim;
  gauge_param.X[2] = zdim;
  gauge_param.X[3] = tdim;
  setDims(gauge_param.X);

  gauge_param.anisotropy = 1.0;

  gauge_param.type = QUDA_WILSON_LINKS;
  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
  gauge_param.t_boundary = QUDA_PERIODIC_T;

  gauge_param.cpu_prec = cpu_prec;
  gauge_param.cuda_prec = cuda_prec;
  gauge_param.reconstruct = link_recon;
  gauge_param.reconstruct_sloppy = link_recon;
  gauge_param.cuda_prec_sloppy = cuda_prec;
  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;

  inv_param.kappa = 0.1;

  if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {
    inv_param.mu = 0.01;
    inv_param.twist_flavor = QUDA_TWIST_MINUS;
  }

  inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
  inv_param.dagger = dagger;

  inv_param.cpu_prec = cpu_prec;
  if (inv_param.cpu_prec != gauge_param.cpu_prec) 
    errorQuda("Gauge and spinor cpu precisions must match");

  inv_param.cuda_prec = cuda_prec;

#ifndef MULTI_GPU // free parameter for single GPU
  gauge_param.ga_pad = 0;
#else // must be this one c/b face for multi gpu
  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
  int pad_size =MAX(x_face_size, y_face_size);
  pad_size = MAX(pad_size, z_face_size);
  pad_size = MAX(pad_size, t_face_size);
  gauge_param.ga_pad = pad_size;    
#endif
  inv_param.sp_pad = 0;
  inv_param.cl_pad = 0;

  //inv_param.sp_pad = 24*24*24;
  //inv_param.cl_pad = 24*24*24;

  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // test code only supports DeGrand-Rossi Basis
  inv_param.dirac_order = QUDA_DIRAC_ORDER;

  if (test_type == 2) {
    inv_param.solution_type = QUDA_MAT_SOLUTION;
  } else {
    inv_param.solution_type = QUDA_MATPC_SOLUTION;
  }

  inv_param.dslash_type = dslash_type;

  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
    inv_param.clover_cpu_prec = cpu_prec;
    inv_param.clover_cuda_prec = cuda_prec;
    inv_param.clover_cuda_prec_sloppy = inv_param.clover_cuda_prec;
    inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
    //if (test_type > 0) {
      hostClover = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec);
      hostCloverInv = hostClover; // fake it
      /*} else {
      hostClover = NULL;
      hostCloverInv = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec);
      }*/
  } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {

  }

  //inv_param.verbosity = QUDA_VERBOSE;

  // construct input fields
  for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gauge_param.cpu_prec);

  ColorSpinorParam csParam;
  
  csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION;
  csParam.nColor = 3;
  csParam.nSpin = 4;
  if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {
    csParam.twistFlavor = inv_param.twist_flavor;
  }
  csParam.nDim = 4;
  for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d];
  csParam.precision = inv_param.cpu_prec;
  csParam.pad = 0;
  if (test_type < 2) {
    csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
    csParam.x[0] /= 2;
  } else {
    csParam.siteSubset = QUDA_FULL_SITE_SUBSET;
  }    
  csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
  csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
  csParam.gammaBasis = inv_param.gamma_basis; 
  csParam.create = QUDA_ZERO_FIELD_CREATE;

  //csParam.verbose = QUDA_DEBUG_VERBOSE;

  spinor = new cpuColorSpinorField(csParam);
  spinorOut = new cpuColorSpinorField(csParam);
  spinorRef = new cpuColorSpinorField(csParam);

  csParam.siteSubset = QUDA_FULL_SITE_SUBSET;
  csParam.x[0] = gauge_param.X[0];
  
  printfQuda("Randomizing fields... ");

  if (strcmp(latfile,"")) {  // load in the command line supplied gauge field
    read_gauge_field(latfile, hostGauge, gauge_param.cpu_prec, gauge_param.X, argc, argv);
    construct_gauge_field(hostGauge, 2, gauge_param.cpu_prec, &gauge_param);
  } else { // else generate a random SU(3) field
    construct_gauge_field(hostGauge, 1, gauge_param.cpu_prec, &gauge_param);
  }

  spinor->Source(QUDA_RANDOM_SOURCE);

  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
    double norm = 0.0; // clover components are random numbers in the range (-norm, norm)
    double diag = 1.0; // constant added to the diagonal

    if (test_type == 2) {
      construct_clover_field(hostClover, norm, diag, inv_param.clover_cpu_prec);
    } else {
      construct_clover_field(hostCloverInv, norm, diag, inv_param.clover_cpu_prec);
    }
  }
  printfQuda("done.\n"); fflush(stdout);
  
  initQuda(device);

  printfQuda("Sending gauge field to GPU\n");
  loadGaugeQuda(hostGauge, &gauge_param);

  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
    printfQuda("Sending clover field to GPU\n");
    loadCloverQuda(hostClover, hostCloverInv, &inv_param);
    //clover = cudaCloverPrecise;
  }

  if (!transfer) {
    csParam.fieldLocation = QUDA_CUDA_FIELD_LOCATION;
    csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
    csParam.pad = inv_param.sp_pad;
    csParam.precision = inv_param.cuda_prec;
    if (csParam.precision == QUDA_DOUBLE_PRECISION ) {
      csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
    } else {
      /* Single and half */
      csParam.fieldOrder = QUDA_FLOAT4_FIELD_ORDER;
    }
 
    if (test_type < 2) {
      csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
      csParam.x[0] /= 2;
    }

    printfQuda("Creating cudaSpinor\n");
    cudaSpinor = new cudaColorSpinorField(csParam);
    printfQuda("Creating cudaSpinorOut\n");
    cudaSpinorOut = new cudaColorSpinorField(csParam);

    if (test_type == 2) csParam.x[0] /= 2;

    csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
    tmp1 = new cudaColorSpinorField(csParam);
    if (dslash_type == QUDA_CLOVER_WILSON_DSLASH ||
	dslash_type == QUDA_TWISTED_MASS_DSLASH) {
      tmp2 = new cudaColorSpinorField(csParam);
    }

    printfQuda("Sending spinor field to GPU\n");
    *cudaSpinor = *spinor;

    std::cout << "Source: CPU = " << norm2(*spinor) << ", CUDA = " << 
      norm2(*cudaSpinor) << std::endl;

    bool pc = (test_type != 2);
    DiracParam diracParam;
    setDiracParam(diracParam, &inv_param, pc);
    diracParam.verbose = QUDA_VERBOSE;
    diracParam.tmp1 = tmp1;
    diracParam.tmp2 = tmp2;
    
    dirac = Dirac::create(diracParam);
  } else {
    std::cout << "Source: CPU = " << norm2(*spinor) << std::endl;
  }
    
}
Пример #7
0
int main(int argc, char *argv[])
{
  FILE *parameterfile = NULL;
  int c, j, i, ix = 0, isample = 0, op_id = 0;
  char * filename = NULL;
  char datafilename[50];
  char parameterfilename[50];
  char conf_filename[50];
  char * input_filename = NULL;
  double plaquette_energy;
  struct stout_parameters params_smear;
  spinor **s, *s_;

#ifdef _KOJAK_INST
#pragma pomp inst init
#pragma pomp inst begin(main)
#endif
  

#if (defined SSE || defined SSE2 || SSE3)
  signal(SIGILL, &catch_ill_inst);
#endif

  DUM_DERI = 8;
  DUM_MATRIX = DUM_DERI + 5;
#if ((defined BGL && defined XLC) || defined _USE_TSPLITPAR)
  NO_OF_SPINORFIELDS = DUM_MATRIX + 3;
#else
  NO_OF_SPINORFIELDS = DUM_MATRIX + 3;
#endif

  verbose = 0;
  g_use_clover_flag = 0;

#ifdef MPI

#  ifdef OMP
  int mpi_thread_provided;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided);
#  else
  MPI_Init(&argc, &argv);
#  endif

  MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id);
#else
  g_proc_id = 0;
#endif

  while ((c = getopt(argc, argv, "h?vVf:o:")) != -1) {
    switch (c) {
      case 'f':
        input_filename = calloc(200, sizeof(char));
        strcpy(input_filename, optarg);
        break;
      case 'o':
        filename = calloc(200, sizeof(char));
        strcpy(filename, optarg);
        break;
      case 'v':
        verbose = 1;
        break;
      case 'V':
        fprintf(stdout,"%s %s\n",PACKAGE_STRING,git_hash);
        exit(0);
        break;
      case 'h':
      case '?':
      default:
        usage();
        break;
    }
  }
  if (input_filename == NULL) {
    input_filename = "invert.input";
  }
  if (filename == NULL) {
    filename = "output";
  }

  /* Read the input file */
  if( (j = read_input(input_filename)) != 0) {
    fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename);
    exit(-1);
  }

#ifdef OMP
  if(omp_num_threads > 0) 
  {
     omp_set_num_threads(omp_num_threads);
  }
  else {
    if( g_proc_id == 0 )
      printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n");

    omp_num_threads = 1;
    omp_set_num_threads(omp_num_threads);
  }

  init_omp_accumulators(omp_num_threads);
#endif

  /* this DBW2 stuff is not needed for the inversion ! */
  if (g_dflgcr_flag == 1) {
    even_odd_flag = 0;
  }
  g_rgi_C1 = 0;
  if (Nsave == 0) {
    Nsave = 1;
  }

  if (g_running_phmc) {
    NO_OF_SPINORFIELDS = DUM_MATRIX + 8;
  }

  tmlqcd_mpi_init(argc, argv);

  g_dbw2rand = 0;

  /* starts the single and double precision random number */
  /* generator                                            */
  start_ranlux(rlxd_level, random_seed);

  /* we need to make sure that we don't have even_odd_flag = 1 */
  /* if any of the operators doesn't use it                    */
  /* in this way even/odd can still be used by other operators */
  for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0;

#ifndef MPI
  g_dbw2rand = 0;
#endif

#ifdef _GAUGE_COPY
  j = init_gauge_field(VOLUMEPLUSRAND, 1);
#else
  j = init_gauge_field(VOLUMEPLUSRAND, 0);
#endif
  if (j != 0) {
    fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
    exit(-1);
  }
  j = init_geometry_indices(VOLUMEPLUSRAND);
  if (j != 0) {
    fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n");
    exit(-1);
  }
  if (no_monomials > 0) {
    if (even_odd_flag) {
      j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
    }
    else {
      j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
    }
    if (j != 0) {
      fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
      exit(-1);
    }
  }
  if (even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(-1);
  }

  if (g_running_phmc) {
    j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20);
    if (j != 0) {
      fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
      exit(-1);
    }
  }

  g_mu = g_mu1;
  if (g_cart_id == 0) {
    /*construct the filenames for the observables and the parameters*/
    strcpy(datafilename, filename);
    strcat(datafilename, ".data");
    strcpy(parameterfilename, filename);
    strcat(parameterfilename, ".para");

    parameterfile = fopen(parameterfilename, "w");
    write_first_messages(parameterfile, 1);
    fclose(parameterfile);
  }

  /* define the geometry */
  geometry();

  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

  phmc_invmaxev = 1.;

  init_operators();

  /* this could be maybe moved to init_operators */
#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if (j != 0) {
    fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
    exit(-1);
  }
  if (g_sloppy_precision_flag == 1) {
    j = init_dirac_halfspinor32();
    if (j != 0)
    {
      fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n");
      exit(-1);
    }
  }
#  if (defined _PERSISTENT)
  if (even_odd_flag)
    init_xchange_halffield();
#  endif
#endif

  for (j = 0; j < Nmeas; j++) {
    sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
    if (g_cart_id == 0) {
      printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
            conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
      fflush(stdout);
    }
    if( (i = read_gauge_field(conf_filename)) !=0) {
      fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
      exit(-2);
    }


    if (g_cart_id == 0) {
      printf("# Finished reading gauge field.\n");
      fflush(stdout);
    }
#ifdef MPI
    xchange_gauge(g_gauge_field);
#endif

    /*compute the energy of the gauge field*/
    plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field);

    if (g_cart_id == 0) {
      printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

    if (use_stout_flag == 1){
      params_smear.rho = stout_rho;
      params_smear.iterations = stout_no_iter;
/*       if (stout_smear((su3_tuple*)(g_gauge_field[0]), &params_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */
/*         exit(1) ; */
      g_update_gauge_copy = 1;
      g_update_gauge_energy = 1;
      g_update_rectangle_energy = 1;
      plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field);

      if (g_cart_id == 0) {
        printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc));
        fflush(stdout);
      }
    }

    if (reweighting_flag == 1) {
      reweighting_factor(reweighting_samples, nstore);
    }

    /* Compute minimal eigenvalues, if wanted */
    if (compute_evs != 0) {
      eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision,
                  0, compute_evs, nstore, even_odd_flag);
    }
    if (phmc_compute_evs != 0) {
#ifdef MPI
      MPI_Finalize();
#endif
      return(0);
    }

    /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/

    if(compute_modenumber != 0 || compute_topsus !=0){
      
      s_ = calloc(no_sources_z2*VOLUMEPLUSRAND+1, sizeof(spinor));
      s  = calloc(no_sources_z2, sizeof(spinor*));
      if(s_ == NULL) { 
	printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); 
      }
      if(s == NULL) { 
	printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); 
      }
      
      
      for(i = 0; i < no_sources_z2; i++) {
#if (defined SSE3 || defined SSE2 || defined SSE)
        s[i] = (spinor*)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE)+i*VOLUMEPLUSRAND;
#else
        s[i] = s_+i*VOLUMEPLUSRAND;
#endif
	
        z2_random_spinor_field(s[i], VOLUME);
	
/* 	what is this here needed for?? */
/*         spinor *aux_,*aux; */
/* #if ( defined SSE || defined SSE2 || defined SSE3 ) */
/*         aux_=calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); */
/*         aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); */
/* #else */
/*         aux_=calloc(VOLUMEPLUSRAND, sizeof(spinor)); */
/*         aux = aux_; */
/* #endif */
	
        if(g_proc_id == 0) {
          printf("source %d \n", i);
        }
	
        if(compute_modenumber != 0){
          mode_number(s[i], mstarsq);
        }
	
        if(compute_topsus !=0) {
          top_sus(s[i], mstarsq);
        }
      }
      free(s);
      free(s_);
    }


    /* move to operators as well */
    if (g_dflgcr_flag == 1) {
      /* set up deflation blocks */
      init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z);

      /* the can stay here for now, but later we probably need */
      /* something like init_dfl_solver called somewhere else  */
      /* create set of approximate lowest eigenvectors ("global deflation subspace") */

      /*       g_mu = 0.; */
      /*       boundary(0.125); */
      generate_dfl_subspace(g_N_s, VOLUME);
      /*       boundary(g_kappa); */
      /*       g_mu = g_mu1; */

      /* Compute little Dirac operators */
      /*       alt_block_compute_little_D(); */
      if (g_debug_level > 0) {
        check_projectors();
        check_local_D();
      }
      if (g_debug_level > 1) {
        check_little_D_inversion();
      }

    }
    if(SourceInfo.type == 1) {
      index_start = 0;
      index_end = 1;
    }

    g_precWS=NULL;
    if(use_preconditioning == 1){
      /* todo load fftw wisdom */
#if (defined HAVE_FFTW ) && !( defined MPI)
      loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX);
#else
      use_preconditioning=0;
#endif
    }

    if (g_cart_id == 0) {
      fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/
    }
    for(op_id = 0; op_id < no_operators; op_id++) {
      boundary(operator_list[op_id].kappa);
      g_kappa = operator_list[op_id].kappa; 
      g_mu = 0.;

      if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){
        printf("# Using preconditioning with treelevel preconditioning operator: %s \n",
              precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver]));
        /* initial preconditioning workspace */
        operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS));
        spinorPrecWS_Init(operator_list[op_id].precWS,
                  operator_list[op_id].kappa,
                  operator_list[op_id].mu/2./operator_list[op_id].kappa,
                  -(0.5/operator_list[op_id].kappa-4.),
                  PRECWSOPERATORSELECT[operator_list[op_id].solver]);
        g_precWS = operator_list[op_id].precWS;

        if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) {
          fitPrecParams(op_id);
        }
      }

      for(isample = 0; isample < no_samples; isample++) {
        for (ix = index_start; ix < index_end; ix++) {
          if (g_cart_id == 0) {
            fprintf(stdout, "#\n"); /*Indicate starting of new index*/
          }
          /* we use g_spinor_field[0-7] for sources and props for the moment */
          /* 0-3 in case of 1 flavour  */
          /* 0-7 in case of 2 flavours */
          prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location);
          operator_list[op_id].inverter(op_id, index_start);
        }
      }


      if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){
        /* free preconditioning workspace */
        spinorPrecWS_Free(operator_list[op_id].precWS);
        free(operator_list[op_id].precWS);
      }

      if(operator_list[op_id].type == OVERLAP){
        free_Dov_WS();
      }

    }
    nstore += Nsave;
  }

#ifdef MPI
  MPI_Finalize();
#endif
#ifdef OMP
  free_omp_accumulators();
#endif
  free_blocks();
  free_dfl_subspace();
  free_gauge_field();
  free_geometry_indices();
  free_spinor_field();
  free_moment_field();
  free_chi_spinor_field();
  return(0);
#ifdef _KOJAK_INST
#pragma pomp inst end(main)
#endif
}
Пример #8
0
int main(int argc,char *argv[])
{
  FILE *parameterfile = NULL;
  char datafilename[206];
  char parameterfilename[206];
  char conf_filename[50];
  char scalar_filename[50];
  char * input_filename = NULL;
  char * filename = NULL;
  double plaquette_energy;

#ifdef _USE_HALFSPINOR
	#undef _USE_HALFSPINOR
	printf("# WARNING: USE_HALFSPINOR will be ignored (not supported here).\n");
#endif

	if(even_odd_flag)
	{
		even_odd_flag=0;
		printf("# WARNING: even_odd_flag will be ignored (not supported here).\n");
	}
	int j,j_max,k,k_max = 2;
	_Complex double * drvsc;

#ifdef HAVE_LIBLEMON
	paramsXlfInfo *xlfInfo;
#endif
	int status = 0;

	static double t1,t2,dt,sdt,dts,qdt,sqdt;
	double antioptaway=0.0;

#ifdef MPI
	static double dt2;

	DUM_DERI = 6;
	DUM_SOLVER = DUM_DERI+2;
	DUM_MATRIX = DUM_SOLVER+6;
	NO_OF_SPINORFIELDS = DUM_MATRIX+2;

#ifdef OMP
	int mpi_thread_provided;
	MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided);
#else
	MPI_Init(&argc, &argv);
#endif
	MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id);

#else
	g_proc_id = 0;
#endif

	g_rgi_C1 = 1.;

  process_args(argc,argv,&input_filename,&filename);
  set_default_filenames(&input_filename, &filename);

  /* Read the input file */
  if( (j = read_input(input_filename)) != 0) {
    fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename);
    exit(-1);
  }

	if(g_proc_id==0) {
		printf("parameter rho_BSM set to %f\n", rho_BSM);
		printf("parameter eta_BSM set to %f\n", eta_BSM);
		printf("parameter  m0_BSM set to %f\n",  m0_BSM);
	}

#ifdef OMP
	init_openmp();
#endif

	tmlqcd_mpi_init(argc, argv);


	if(g_proc_id==0) {
#ifdef SSE
		printf("# The code was compiled with SSE instructions\n");
#endif
#ifdef SSE2
		printf("# The code was compiled with SSE2 instructions\n");
#endif
#ifdef SSE3
		printf("# The code was compiled with SSE3 instructions\n");
#endif
#ifdef P4
		printf("# The code was compiled for Pentium4\n");
#endif
#ifdef OPTERON
		printf("# The code was compiled for AMD Opteron\n");
#endif
#ifdef _GAUGE_COPY
		printf("# The code was compiled with -D_GAUGE_COPY\n");
#endif
#ifdef BGL
		printf("# The code was compiled for Blue Gene/L\n");
#endif
#ifdef BGP
		printf("# The code was compiled for Blue Gene/P\n");
#endif
#ifdef _USE_HALFSPINOR
		printf("# The code was compiled with -D_USE_HALFSPINOR\n");
#endif
#ifdef _USE_SHMEM
		printf("# The code was compiled with -D_USE_SHMEM\n");
#ifdef _PERSISTENT
		printf("# The code was compiled for persistent MPI calls (halfspinor only)\n");
#endif
#endif
#ifdef MPI
	#ifdef _NON_BLOCKING
		printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n");
	#endif
#endif
		printf("\n");
		fflush(stdout);
	}


#ifdef _GAUGE_COPY
	init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
#else
	init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
#endif
	init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);


	j = init_bispinor_field(VOLUMEPLUSRAND, 12);
	if ( j!= 0) {
		fprintf(stderr, "Not enough memory for bispinor fields! Aborting...\n");
		exit(0);
	}

	j = init_spinor_field(VOLUMEPLUSRAND, 12);
	if ( j!= 0) {
		fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
		exit(0);
	}

	int numbScalarFields = 4;
	j = init_scalar_field(VOLUMEPLUSRAND, numbScalarFields);
	if ( j!= 0) {
		fprintf(stderr, "Not enough memory for scalar fields! Aborting...\n");
		exit(0);
	}

	drvsc = malloc(18*VOLUMEPLUSRAND*sizeof(_Complex double));

	if(g_proc_id == 0) {
		fprintf(stdout,"# The number of processes is %d \n",g_nproc);
		printf("# The lattice size is %d x %d x %d x %d\n",
		 (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ));
		printf("# The local lattice size is %d x %d x %d x %d\n",
		 (int)(T), (int)(LX), (int)(LY),(int) LZ);

		fflush(stdout);
	}

	/* define the geometry */
	geometry();

  j = init_bsm_2hop_lookup(VOLUME);
	if ( j!= 0) {
    // this should not be reached since the init function calls fatal_error anyway
		fprintf(stderr, "Not enough memory for BSM2b 2hop lookup table! Aborting...\n");
		exit(0);
	}

	/* define the boundary conditions for the fermion fields */
  /* for the actual inversion, this is done in invert.c as the operators are iterated through */
  // 
  // For the BSM operator we don't use kappa normalisation,
  // as a result, when twisted boundary conditions are applied this needs to be unity.
  // In addition, unlike in the Wilson case, the hopping term comes with a plus sign.
  // However, in boundary(), the minus sign for the Wilson case is implicitly included.
  // We therefore use -1.0 here.
	boundary(-1.0);

	status = check_geometry();
	if (status != 0) {
		fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n");
		exit(1);
	}
#if (defined MPI && !(defined _USE_SHMEM))
	// fails, we're not using spinor fields
//	check_xchange();
#endif

	start_ranlux(1, 123456);

	// read gauge field
	if( strcmp(gauge_input_filename, "create_random_gaugefield") == 0 ) {
		random_gauge_field(reproduce_randomnumber_flag, g_gauge_field);
	}
	else {
		sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
		if (g_cart_id == 0) {
		  printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
				conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
		  fflush(stdout);
		}

		int i;
		if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) {
		  fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
		  exit(-2);
		}

		if (g_cart_id == 0) {
			printf("# Finished reading gauge field.\n");
			fflush(stdout);
		}
	}

	// read scalar field
	if( strcmp(scalar_input_filename, "create_random_scalarfield") == 0 ) {
		for( int s=0; s<numbScalarFields; s++ )
			ranlxd(g_scalar_field[s], VOLUME);
	}
	else {
		sprintf(scalar_filename, "%s.%d", scalar_input_filename, nscalar);
		if (g_cart_id == 0) {
		  printf("#\n# Trying to read scalar field from file %s in %s precision.\n",
				scalar_filename, (scalar_precision_read_flag == 32 ? "single" : "double"));
		  fflush(stdout);
		}

		int i;
		if( (i = read_scalar_field(scalar_filename,g_scalar_field)) !=0) {
		  fprintf(stderr, "Error %d while reading scalar field from %s\n Aborting...\n", i, scalar_filename);
		  exit(-2);
		}

		if (g_cart_id == 0) {
			printf("# Finished reading scalar field.\n");
			fflush(stdout);
		}
	}

#ifdef MPI
    xchange_gauge(g_gauge_field);
#endif

    /*compute the energy of the gauge field*/
    plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

    if (g_cart_id == 0) {
      printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

#ifdef MPI
	for( int s=0; s<numbScalarFields; s++ )
		generic_exchange(g_scalar_field[s], sizeof(scalar));
#endif

	/*initialize the bispinor fields*/
	j_max=1;
	sdt=0.;
  // w
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[4]), reproduce_randomnumber_flag, RN_GAUSS);
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[4])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS);
	// for the D^\dagger test:
  // v
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[5]), reproduce_randomnumber_flag, RN_GAUSS);
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[5])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS);
#if defined MPI
	generic_exchange(g_bispinor_field[4], sizeof(bispinor));
#endif

	// print L2-norm of source:
	double squarenorm = square_norm((spinor*)g_bispinor_field[4], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("\n# square norm of the source: ||w||^2 = %e\n\n", squarenorm);
		fflush(stdout);
	}

  double t_MG, t_BK;
	/* inversion needs to be done first because it uses loads of the g_bispinor_fields internally */
#if TEST_INVERSION
  if(g_proc_id==1)
    printf("Testing inversion\n");
  // Bartek's operator
  t1 = gettime();
	cg_her_bi(g_bispinor_field[9], g_bispinor_field[4],
           25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2b);
  t_BK = gettime() - t1;

  // Marco's operator
  t1 = gettime();
	cg_her_bi(g_bispinor_field[8], g_bispinor_field[4],
           25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2m);
  t_MG = gettime() - t1;
  
  if(g_proc_id==0)
    printf("Operator inversion time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); 
#endif

  /* now apply the operators to the same bispinor field and do various comparisons */

  // Marco's operator
#ifdef MPI
  MPI_Barrier(MPI_COMM_WORLD);
#endif
  t_MG = 0.0;
  t1 = gettime();
  D_psi_BSM2m(g_bispinor_field[0], g_bispinor_field[4]);
  t1 = gettime() - t1;
#ifdef MPI
	MPI_Allreduce (&t1, &t_MG, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
  t_MG = t1;
#endif

  // Bartek's operator
#ifdef MPI
  MPI_Barrier(MPI_COMM_WORLD);
#endif
  t_BK = 0.0;
  t1 = gettime();
  D_psi_BSM2b(g_bispinor_field[1], g_bispinor_field[4]);
  t1 = gettime() - t1;
#ifdef MPI
	MPI_Allreduce (&t1, &t_BK, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
  t_BK = t1;
#endif
  
  if(g_proc_id==0)
    printf("Operator application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); 

	squarenorm = square_norm((spinor*)g_bispinor_field[0], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# || D_MG w ||^2 = %.16e\n", squarenorm);
		fflush(stdout);
	}
	squarenorm = square_norm((spinor*)g_bispinor_field[1], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# || D_BK w ||^2 = %.16e\n\n\n", squarenorm);
		fflush(stdout);
	}

  diff( (spinor*)g_bispinor_field[3], (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[1], 2*VOLUME);

  printf("element-wise difference between (D_BK w) and (D_MG w)\n");
  printf("( D_MG w - M_BK w )->sp_up.s0.c0= %.16e + I*(%.16e)\n\n", creal(g_bispinor_field[3][0].sp_up.s0.c0), cimag(g_bispinor_field[3][0].sp_up.s0.c0) );

  double diffnorm = square_norm( (spinor*) g_bispinor_field[3], 2*VOLUME, 1 );
  if(g_proc_id==0){
    printf("Square norm of the difference\n");
    printf("|| D_MG w - D_BK w ||^2     = %.16e \n\n\n", diffnorm); 
  }

	// < D w, v >
  printf("Check consistency of D and D^dagger\n");
  _Complex double prod1_MG = scalar_prod( (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 );
	if(g_proc_id==0)
    printf("< D_MG w, v >        = %.16e + I*(%.16e)\n", creal(prod1_MG), cimag(prod1_MG));
	
  _Complex double prod1_BK = scalar_prod( (spinor*)g_bispinor_field[1], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 );
  if(g_proc_id==0)
  	printf("< D_BK w, v >        = %.16e + I*(%.16e)\n\n", creal(prod1_BK), cimag(prod1_BK));
	
  // < w, D^\dagger v >
  t_MG = gettime();
	D_psi_dagger_BSM2m(g_bispinor_field[6], g_bispinor_field[5]);
  t_MG = gettime()-t_MG;

  t_BK = gettime();
	D_psi_dagger_BSM2b(g_bispinor_field[7], g_bispinor_field[5]);
  t_BK = gettime() - t_BK;

  if(g_proc_id==0)
    printf("Operator dagger application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); 

	_Complex double prod2_MG = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[6], 2*VOLUME, 1);
	_Complex double prod2_BK = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[7], 2*VOLUME, 1);
  if( g_proc_id == 0 ){
	  printf("< w, D_MG^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_MG), cimag(prod2_MG));
    printf("< w, D_BK^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_BK), cimag(prod2_BK));
	  
    printf("\n| < D_MG w, v > - < w, D_MG^dagger v > | = %.16e\n",cabs(prod2_MG-prod1_MG));
	  printf("| < D_BK w, v > - < w, D_BK^dagger v > | = %.16e\n\n",cabs(prod2_BK-prod1_BK));
  }
	
#if TEST_INVERSION
	// check result of inversion
	Q2_psi_BSM2m(g_bispinor_field[10], g_bispinor_field[8]);
	Q2_psi_BSM2b(g_bispinor_field[11], g_bispinor_field[8]);
	assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	double squarenorm_MGMG = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1);
	double squarenorm_BKMG = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# ||Q2_MG*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGMG);
		printf("# ||Q2_BK*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKMG);
		fflush(stdout);
	}
	
  Q2_psi_BSM2b(g_bispinor_field[10], g_bispinor_field[9]);
  Q2_psi_BSM2m(g_bispinor_field[11], g_bispinor_field[9]);
	assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	double squarenorm_BKBK = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1);
	double squarenorm_MGBK = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# ||Q2_BK*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKBK);
		printf("# ||Q2_MG*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGBK);
		fflush(stdout);
	}
#endif

#ifdef OMP
	free_omp_accumulators();
#endif
	free_gauge_field();
	free_geometry_indices();
	free_bispinor_field();
	free_scalar_field();
#ifdef MPI
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
#endif
	return(0);
}
Пример #9
0
int main(int argc,char *argv[]) {

  FILE *parameterfile=NULL, *countfile=NULL;
  char *filename = NULL;
  char datafilename[50];
  char parameterfilename[50];
  char gauge_filename[50];
  char nstore_filename[50];
  char tmp_filename[50];
  char *input_filename = NULL;
  int status = 0, accept = 0;
  int j,ix,mu, trajectory_counter=1;
  struct timeval t1;

  /* Energy corresponding to the Gauge part */
  double plaquette_energy = 0., rectangle_energy = 0.;
  /* Acceptance rate */
  int Rate=0;
  /* Do we want to perform reversibility checks */
  /* See also return_check_flag in read_input.h */
  int return_check = 0;
  /* For getopt */
  int c;

  paramsXlfInfo *xlfInfo;

/* For online measurements */
  measurement * meas;
  int imeas;
  
#ifdef _KOJAK_INST
#pragma pomp inst init
#pragma pomp inst begin(main)
#endif

#if (defined SSE || defined SSE2 || SSE3)
  signal(SIGILL,&catch_ill_inst);
#endif

  strcpy(gauge_filename,"conf.save");
  strcpy(nstore_filename,".nstore_counter");
  strcpy(tmp_filename, ".conf.tmp");

  verbose = 1;
  g_use_clover_flag = 0;

#ifdef MPI

#  ifdef OMP
  int mpi_thread_provided;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided);
#  else
  MPI_Init(&argc, &argv);
#  endif

  MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id);
#else
  g_proc_id = 0;
#endif


  while ((c = getopt(argc, argv, "h?vVf:o:")) != -1) {
    switch (c) {
    case 'f':
      input_filename = calloc(200, sizeof(char));
      strcpy(input_filename,optarg);
      break;
    case 'o':
      filename = calloc(200, sizeof(char));
      strcpy(filename,optarg);
      break;
    case 'v':
      verbose = 1;
      break;
    case 'V':
      fprintf(stdout,"%s %s\n",PACKAGE_STRING,git_hash);
      exit(0);
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }
  if(input_filename == NULL){
    input_filename = "hmc.input";
  }
  if(filename == NULL){
    filename = "output";
  }

  /* Read the input file */
  if( (status = read_input(input_filename)) != 0) {
    fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename);
    exit(-1);
  }

  /* set number of omp threads to be used */
#ifdef OMP
  if(omp_num_threads > 0) 
  {
     omp_set_num_threads(omp_num_threads);
  }
  else {
    if( g_proc_id == 0 )
      printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n");

    omp_num_threads = 1;
    omp_set_num_threads(omp_num_threads);
  }

  init_omp_accumulators(omp_num_threads);
#endif

  DUM_DERI = 4;
  DUM_SOLVER = DUM_DERI+1;
  DUM_MATRIX = DUM_SOLVER+6;
  if(g_running_phmc) {
    NO_OF_SPINORFIELDS = DUM_MATRIX+8;
  }
  else {
    NO_OF_SPINORFIELDS = DUM_MATRIX+6;
  }
  DUM_BI_DERI = 6;
  DUM_BI_SOLVER = DUM_BI_DERI+7;

  DUM_BI_MATRIX = DUM_BI_SOLVER+6;
  NO_OF_BISPINORFIELDS = DUM_BI_MATRIX+6;

  tmlqcd_mpi_init(argc, argv);

  if(nstore == -1) {
    countfile = fopen(nstore_filename, "r");
    if(countfile != NULL) {
      j = fscanf(countfile, "%d %d %s\n", &nstore, &trajectory_counter, gauge_input_filename);
      if(j < 1) nstore = 0;
      if(j < 2) trajectory_counter = 0;
      fclose(countfile);
    }
    else {
      nstore = 0;
      trajectory_counter = 0;
    }
  }
  
#ifndef MPI
  g_dbw2rand = 0;
#endif
  
  
  g_mu = g_mu1;
  
#ifdef _GAUGE_COPY
  status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
#else
  status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
#endif
  if (status != 0) {
    fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
    exit(0);
  }
  j = init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);
  if (j != 0) {
    fprintf(stderr, "Not enough memory for geometry_indices! Aborting...\n");
    exit(0);
  }
  if(even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS);
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(0);
  }
  if(even_odd_flag) {
    j = init_csg_field(VOLUMEPLUSRAND/2);
  }
  else {
    j = init_csg_field(VOLUMEPLUSRAND);
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for csg fields! Aborting...\n");
    exit(0);
  }
  j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand);
  if (j != 0) {
    fprintf(stderr, "Not enough memory for moment fields! Aborting...\n");
    exit(0);
  }

  if(g_running_phmc) {
    j = init_bispinor_field(VOLUME/2, NO_OF_BISPINORFIELDS);
    if (j!= 0) {
      fprintf(stderr, "Not enough memory for bi-spinor fields! Aborting...\n");
      exit(0);
    }
  }

  /* list and initialize measurements*/
  if(g_proc_id == 0) {
    printf("\n");
    for(j = 0; j < no_measurements; j++) {
      printf("# measurement id %d, type = %d: Frequency %d\n", j, measurement_list[j].type, measurement_list[j].freq);
    }
  }
  init_measurements();

  /*construct the filenames for the observables and the parameters*/
  strcpy(datafilename,filename);  
  strcat(datafilename,".data");
  strcpy(parameterfilename,filename);  
  strcat(parameterfilename,".para");

  if(g_proc_id == 0){
    parameterfile = fopen(parameterfilename, "a");
    write_first_messages(parameterfile, "hmc", git_hash);
  }

  /* define the geometry */
  geometry();

  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

  status = check_geometry();

  if (status != 0) {
    fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n");
    exit(1);
  }


#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if (j!= 0) {
    fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
    exit(-1);
  }
  if(g_sloppy_precision_flag == 1) {
    init_dirac_halfspinor32();
  }
#  if (defined _PERSISTENT)
  init_xchange_halffield();
#  endif
#endif

  /* Initialise random number generator */
  start_ranlux(rlxd_level, random_seed^nstore );

  /* Set up the gauge field */
  /* continue and restart */
  if(startoption==3 || startoption == 2) {
    if(g_proc_id == 0) {
      printf("# Trying to read gauge field from file %s in %s precision.\n",
            gauge_input_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
      fflush(stdout);
    }
    if( (status = read_gauge_field(gauge_input_filename)) != 0) {
      fprintf(stderr, "Error %d while reading gauge field from %s\nAborting...\n", status, gauge_input_filename);
      exit(-2);
    }

    if (g_proc_id == 0){
      printf("# Finished reading gauge field.\n");
      fflush(stdout);
    }
  }
  else if (startoption == 1) {
    /* hot */
    random_gauge_field(reproduce_randomnumber_flag, g_gauge_field);
  }
  else if(startoption == 0) {
    /* cold */
    unit_g_gauge_field();
  }

  /*For parallelization: exchange the gaugefield */
#ifdef MPI
  xchange_gauge(g_gauge_field);
#endif

  if(even_odd_flag) {
    j = init_monomials(VOLUMEPLUSRAND/2, even_odd_flag);
  }
  else {
    j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
    exit(0);
  }

  init_integrator();

  if(g_proc_id == 0) {
    for(j = 0; j < no_monomials; j++) {
      printf("# monomial id %d type = %d timescale %d\n", j, monomial_list[j].type, monomial_list[j].timescale);
    }
  }

  plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field);
  if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) {
    rectangle_energy = measure_rectangles( (const su3**) g_gauge_field);
    if(g_proc_id == 0){
      fprintf(parameterfile,"# Computed rectangle value: %14.12f.\n",rectangle_energy/(12.*VOLUME*g_nproc));
    }
  }
  //eneg = g_rgi_C0 * plaquette_energy + g_rgi_C1 * rectangle_energy;

  if(g_proc_id == 0) {
    fprintf(parameterfile,"# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc));
    printf("# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc));
    fclose(parameterfile);
  }

  /* set ddummy to zero */
  for(ix = 0; ix < VOLUMEPLUSRAND; ix++){
    for(mu=0; mu<4; mu++){
      ddummy[ix][mu].d1=0.;
      ddummy[ix][mu].d2=0.;
      ddummy[ix][mu].d3=0.;
      ddummy[ix][mu].d4=0.;
      ddummy[ix][mu].d5=0.;
      ddummy[ix][mu].d6=0.;
      ddummy[ix][mu].d7=0.;
      ddummy[ix][mu].d8=0.;
    }
  }

  if(g_proc_id == 0) {
    gettimeofday(&t1,NULL);
    countfile = fopen("history_hmc_tm", "a");
    fprintf(countfile, "!!! Timestamp %ld, Nsave = %d, g_mu = %e, g_mu1 = %e, g_mu_2 = %e, g_mu3 = %e, beta = %f, kappa = %f, C1 = %f, ",
            t1.tv_sec, Nsave, g_mu, g_mu1, g_mu2, g_mu3, g_beta, g_kappa, g_rgi_C1);
    for(j = 0; j < Integrator.no_timescales; j++) {
      fprintf(countfile, "n_int[%d] = %d ", j, Integrator.no_mnls_per_ts[j]);
    }
    fprintf(countfile, "\n");
    fclose(countfile);
  }


  /* Loop for measurements */
  for(j = 0; j < Nmeas; j++) {
    if(g_proc_id == 0) {
      printf("#\n# Starting trajectory no %d\n", trajectory_counter);
    }

    return_check = return_check_flag && (trajectory_counter%return_check_interval == 0);

    accept = update_tm(&plaquette_energy, &rectangle_energy, datafilename, 
		       return_check, Ntherm<trajectory_counter, trajectory_counter);
    Rate += accept;

    /* Save gauge configuration all Nsave times */
    if((Nsave !=0) && (trajectory_counter%Nsave == 0) && (trajectory_counter!=0)) {
      sprintf(gauge_filename,"conf.%.4d", nstore);
      if(g_proc_id == 0) {
        countfile = fopen("history_hmc_tm", "a");
        fprintf(countfile, "%.4d, measurement %d of %d, Nsave = %d, Plaquette = %e, trajectory nr = %d\n",
            nstore, j, Nmeas, Nsave, plaquette_energy/(6.*VOLUME*g_nproc),
            trajectory_counter);
        fclose(countfile);
      }
      nstore ++;
    }
    else {
      sprintf(gauge_filename,"conf.save");
    }
    if(((Nsave !=0) && (trajectory_counter%Nsave == 0) && (trajectory_counter!=0)) || (write_cp_flag == 1) || (j >= (Nmeas - 1))) {
      /* If a reversibility check was performed this trajectory, and the trajectory was accepted,
       * then the configuration is currently stored in .conf.tmp, written out by update_tm.
       * In that case also a readback was performed, so no need to test .conf.tmp
       * In all other cases the gauge configuration still needs to be written out here. */
      if (!(return_check && accept)) {
        xlfInfo = construct_paramsXlfInfo(plaquette_energy/(6.*VOLUME*g_nproc), trajectory_counter);
        if (g_proc_id == 0) {
          fprintf(stdout, "# Writing gauge field to %s.\n", tmp_filename);
        }
        if((status = write_gauge_field( tmp_filename, gauge_precision_write_flag, xlfInfo) != 0 )) {
          /* Writing the gauge field failed directly */
          fprintf(stderr, "Error %d while writing gauge field to %s\nAborting...\n", status, tmp_filename);
          exit(-2);
        }
        if (!g_disable_IO_checks) {
#ifdef HAVE_LIBLEMON
          /* Read gauge field back to verify the writeout */
          if (g_proc_id == 0) {
            fprintf(stdout, "# Write completed, verifying write...\n");
          }
          if( (status = read_gauge_field(tmp_filename)) != 0) {
            fprintf(stderr, "WARNING, writeout of %s returned no error, but verification discovered errors.\n", tmp_filename);
            fprintf(stderr, "Potential disk or MPI I/O error. Aborting...\n");
            exit(-3);
          }
          if (g_proc_id == 0) {
            fprintf(stdout, "# Write successfully verified.\n");
          }
#else
          if (g_proc_id == 0) {
            fprintf(stdout, "# Write completed successfully.\n");
          }
#endif
        }
        free(xlfInfo);
      }
      /* Now move .conf.tmp into place */
      if(g_proc_id == 0) {
        fprintf(stdout, "# Renaming %s to %s.\n", tmp_filename, gauge_filename);
        if (rename(tmp_filename, gauge_filename) != 0) {
          /* Errno can be inspected here for more descriptive error reporting */
          fprintf(stderr, "Error while trying to rename temporary file %s to %s. Unable to proceed.\n", tmp_filename, gauge_filename);
          exit(-2);
        }
        countfile = fopen(nstore_filename, "w");
        fprintf(countfile, "%d %d %s\n", nstore, trajectory_counter+1, gauge_filename);
        fclose(countfile);
      }
    }

    /* online measurements */
    for(imeas = 0; imeas < no_measurements; imeas++){
      meas = &measurement_list[imeas];
      if(trajectory_counter%meas->freq == 0){
        if (g_proc_id == 0) {
          fprintf(stdout, "#\n# Beginning online measurement.\n");
        }
        meas->measurefunc(trajectory_counter, imeas, even_odd_flag);
      }
    }

    if(g_proc_id == 0) {
      verbose = 1;
    }
    ix = reread_input("hmc.reread");
    if(g_proc_id == 0) {
      verbose = 0;
    }

#ifdef MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    if(ix == 0 && g_proc_id == 0) {
      countfile = fopen("history_hmc_tm", "a");
      fprintf(countfile, "# Changed input parameters according to hmc.reread: measurement %d of %d\n", j, Nmeas);
      fclose(countfile);
      printf("# Changed input parameters according to hmc.reread (see stdout): measurement %d of %d\n", j, Nmeas);
      remove("hmc.reread");
    }
    trajectory_counter++;
  } /* end of loop over trajectories */

  if(g_proc_id == 0 && Nmeas != 0) {
    printf("# Acceptance rate was %3.2f percent, %d out of %d trajectories accepted.\n", 100.*(double)Rate/(double)Nmeas, Rate, Nmeas);
    fflush(stdout);
    parameterfile = fopen(parameterfilename, "a");
    fprintf(parameterfile, "# Acceptance rate was %3.2f percent, %d out of %d trajectories accepted.\n", 100.*(double)Rate/(double)Nmeas, Rate, Nmeas);
    fclose(parameterfile);
  }

#ifdef MPI
  MPI_Finalize();
#endif
#ifdef OMP
  free_omp_accumulators();
#endif
  free_gauge_tmp();
  free_gauge_field();
  free_geometry_indices();
  free_spinor_field();
  free_moment_field();
  free_monomials();
  if(g_running_phmc) {
    free_bispinor_field();
    free_chi_spinor_field();
  }

  return(0);
#ifdef _KOJAK_INST
#pragma pomp inst end(main)
#endif
}
Пример #10
0
int update_tm(double *plaquette_energy, double *rectangle_energy, 
              char * filename, const int return_check, const int acctest, 
	      const int traj_counter) {

  su3 *v, *w;
  static int ini_g_tmp = 0;
  int accept, i=0, j=0, iostatus=0;

  double yy[1];
  double dh, expmdh, ret_dh=0., ret_gauge_diff=0., tmp;
  double atime=0., etime=0.;
  double ks = 0., kc = 0., ds, tr, ts, tt;

  char tmp_filename[50];

  /* Energy corresponding to the Gauge part */
  double new_plaquette_energy=0., new_rectangle_energy = 0.;

  /* Energy corresponding to the Momenta part */
  double enep=0., enepx=0., ret_enep = 0.;

  /* Energy corresponding to the pseudo fermion part(s) */
  FILE * datafile=NULL, * ret_check_file=NULL;
  hamiltonian_field_t hf;
  paramsXlfInfo *xlfInfo;

  hf.gaugefield = g_gauge_field;
  hf.momenta = moment;
  hf.derivative = df0;
  hf.update_gauge_copy = g_update_gauge_copy;
  hf.update_gauge_energy = g_update_gauge_energy;
  hf.update_rectangle_energy = g_update_rectangle_energy;
  hf.traj_counter = traj_counter;
  integrator_set_fields(&hf);

  strcpy(tmp_filename, ".conf.tmp");
  if(ini_g_tmp == 0) {
    ini_g_tmp = init_gauge_tmp(VOLUME);
    if(ini_g_tmp != 0) {
      exit(-1);
    }
    ini_g_tmp = 1;
  }
  atime = gettime();

  /*
   *  here the momentum and spinor fields are initialized 
   *  and their respective actions are calculated
   */

  /* 
   *  copy the gauge field to gauge_tmp 
   */
#ifdef OMP
#pragma omp parallel for private(w,v)
#endif
  for(int ix=0;ix<VOLUME;ix++) { 
    for(int mu=0;mu<4;mu++) {
      v=&hf.gaugefield[ix][mu];
      w=&gauge_tmp[ix][mu];
      _su3_assign(*w,*v);
    }
  }

  /* heatbath for all monomials */
  for(i = 0; i < Integrator.no_timescales; i++) {
    for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) {
      monomial_list[ Integrator.mnls_per_ts[i][j] ].hbfunction(Integrator.mnls_per_ts[i][j], &hf);
    }
  }

  if(Integrator.monitor_forces) monitor_forces(&hf);
  /* initialize the momenta  */
  enep = random_su3adj_field(reproduce_randomnumber_flag, hf.momenta);

  g_sloppy_precision = 1;

  /* run the trajectory */
  if(Integrator.n_int[Integrator.no_timescales-1] > 0) {
    Integrator.integrate[Integrator.no_timescales-1](Integrator.tau, 
						     Integrator.no_timescales-1, 1);
  }

  g_sloppy_precision = 0;

  /* compute the final energy contributions for all monomials */
  dh = 0.;
  for(i = 0; i < Integrator.no_timescales; i++) {
    for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) {
      dh += monomial_list[ Integrator.mnls_per_ts[i][j] ].accfunction(Integrator.mnls_per_ts[i][j], &hf);
    }
  }

  enepx = moment_energy(hf.momenta);

  if (!bc_flag) { /* if PBC */
    new_plaquette_energy = measure_gauge_action( (const su3**) hf.gaugefield);
    if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) {
      new_rectangle_energy = measure_rectangles( (const su3**) hf.gaugefield);
    }
  }
  if(g_proc_id == 0 && g_debug_level > 3) printf("called moment_energy: dh = %1.10e\n", (enepx - enep));
  /* Compute the energy difference */
  dh = dh + (enepx - enep);
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("called momenta_acc dH = %e\n", (enepx - enep));
  }
  expmdh = exp(-dh);
  /* the random number is only taken at node zero and then distributed to 
     the other sites */
  ranlxd(yy,1);
  if(g_proc_id==0) {
#ifdef MPI
    for(i = 1; i < g_nproc; i++) {
      MPI_Send(&yy[0], 1, MPI_DOUBLE, i, 31, MPI_COMM_WORLD);
    }
#endif
  }
#ifdef MPI
  else{
    MPI_Recv(&yy[0], 1, MPI_DOUBLE, 0, 31, MPI_COMM_WORLD, &status);
  }
#endif

  accept = (!acctest | (expmdh > yy[0]));
  if(g_proc_id == 0) {
    fprintf(stdout, "# Trajectory is %saccepted.\n", (accept ? "" : "not "));
  }
  /* Here a reversibility test is performed */
  /* The trajectory is integrated back      */
  if(return_check) {
    if(g_proc_id == 0) {
      fprintf(stdout, "# Performing reversibility check.\n");
    }
    if(accept) {
      /* save gauge file to disk before performing reversibility check */
      xlfInfo = construct_paramsXlfInfo((*plaquette_energy)/(6.*VOLUME*g_nproc), -1);
      // Should write this to temporary file first, and then check
      if(g_proc_id == 0 && g_debug_level > 0) {
        fprintf(stdout, "# Writing gauge field to file %s.\n", tmp_filename);
      }
      if((iostatus = write_gauge_field( tmp_filename, 64, xlfInfo) != 0 )) {
        /* Writing failed directly */
        fprintf(stderr, "Error %d while writing gauge field to %s\nAborting...\n", iostatus, tmp_filename);
        exit(-2);
      }
      /* There is double writing of the gauge field, also in hmc_tm.c in this case */
      /* No reading back check needed here, as reading back is done further down */
      if(g_proc_id == 0 && g_debug_level > 0) {
        fprintf(stdout, "# Writing done.\n");
      }
      free(xlfInfo);
    }
    g_sloppy_precision = 1;
    /* run the trajectory back */
    Integrator.integrate[Integrator.no_timescales-1](-Integrator.tau, 
                         Integrator.no_timescales-1, 1);

    g_sloppy_precision = 0;

    /*   compute the energy contributions from the pseudo-fermions  */
    ret_dh = 0.;
    for(i = 0; i < Integrator.no_timescales; i++) {
      for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) {
        ret_dh += monomial_list[ Integrator.mnls_per_ts[i][j] ].accfunction(Integrator.mnls_per_ts[i][j], &hf);
      }
    }

    ret_enep = moment_energy(hf.momenta);

    /* Compute the energy difference */
    ret_dh += ret_enep - enep ;

    /* Compute Differences in the fields */
    ks = 0.;
    kc = 0.;

#ifdef OMP
#pragma omp parallel private(w,v,tt,tr,ts,ds,ks,kc)
    {
    int thread_num = omp_get_thread_num();
#endif
    su3 ALIGN v0;
#ifdef OMP
#pragma omp for
#endif
    for(int ix = 0; ix < VOLUME; ++ix)
    {
      for(int mu = 0; mu < 4; ++mu)
      {
        v=&hf.gaugefield[ix][mu];
        w=&gauge_tmp[ix][mu];
	_su3_minus_su3(v0, *v, *w);
	_su3_square_norm(ds, v0);

        tr = sqrt(ds) + kc;
        ts = tr + ks;
        tt = ts-ks;
        ks = ts;
        kc = tr-tt;
      }
    }
    kc=ks+kc;
#ifdef OMP
    g_omp_acc_re[thread_num] = kc;
      
    } /* OpenMP parallel section closing brace */

    /* sum up contributions from thread-local kahan summations */
    for(int k = 0; k < omp_num_threads; ++k)
      ret_gauge_diff += g_omp_acc_re[k];
#else
    ret_gauge_diff = kc;
#endif

#ifdef MPI
    tmp = ret_gauge_diff;
    MPI_Reduce(&tmp, &ret_gauge_diff, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
#endif
    /* compute the total H */
    tmp = enep;
    for(i = 0; i < Integrator.no_timescales; i++) {
      for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) {
        tmp += monomial_list[ Integrator.mnls_per_ts[i][j] ].energy0;
      }
    }
    /* Output */
    if(g_proc_id == 0) {
      ret_check_file = fopen("return_check.data","a");
      fprintf(ret_check_file,"ddh = %1.4e ddU= %1.4e ddh/H = %1.4e\n",
              ret_dh, ret_gauge_diff/4./((double)(VOLUME*g_nproc))/3., ret_dh/tmp);
      fclose(ret_check_file);
    }

    if(accept) {
      /* Read back gauge field */
      if(g_proc_id == 0 && g_debug_level > 0) {
        fprintf(stdout, "# Trying to read gauge field from file %s.\n", tmp_filename);
      }

      if((iostatus = read_gauge_field(tmp_filename) != 0)) {
        fprintf(stderr, "Error %d while reading gauge field from %s\nAborting...\n", iostatus, tmp_filename);
        exit(-2);
      }
      if(g_proc_id == 0 && g_debug_level > 0) {
        fprintf(stdout, "# Reading done.\n");
      }
    }
    if(g_proc_id == 0) {
      fprintf(stdout, "# Reversibility check done.\n");
    }
  } /* end of reversibility check */

  if(accept) {
    *plaquette_energy = new_plaquette_energy;
    *rectangle_energy = new_rectangle_energy;
    /* put the links back to SU(3) group */
    if (!bc_flag) { /* periodic boundary conditions */
#ifdef OMP
#pragma omp parallel for private(v)
#endif
      for(int ix=0;ix<VOLUME;ix++) { 
        for(int mu=0;mu<4;mu++) { 
          v=&hf.gaugefield[ix][mu];
          restoresu3_in_place(v); 
        }
      }
    }
  }
  else { /* reject: copy gauge_tmp to hf.gaugefield */
#ifdef OMP
#pragma omp parallel for private(w) private(v)
#endif
    for(int ix=0;ix<VOLUME;ix++) {
      for(int mu=0;mu<4;mu++){
        v=&hf.gaugefield[ix][mu];
        w=&gauge_tmp[ix][mu];
        _su3_assign(*v,*w);
      }
    }
  }
  hf.update_gauge_copy = 1;
  g_update_gauge_copy = 1;
  hf.update_gauge_energy = 1;
  g_update_gauge_energy = 1;
  hf.update_rectangle_energy = 1;
  g_update_rectangle_energy = 1;
#ifdef MPI
  xchange_gauge(hf.gaugefield);
#endif
  etime=gettime();

  /* printing data in the .data file */
  if(g_proc_id==0) {
    datafile = fopen(filename, "a");
    if (!bc_flag) { /* if Periodic Boundary Conditions */
      fprintf(datafile, "%.8d %14.12f %14.12f %e ", traj_counter,
              (*plaquette_energy)/(6.*VOLUME*g_nproc), dh, expmdh);
    }
    for(i = 0; i < Integrator.no_timescales; i++) {
      for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) {
        if(monomial_list[ Integrator.mnls_per_ts[i][j] ].type != GAUGE
	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != SFGAUGE 
	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDPOLY
	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDCLOVER
	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERNDTRLOG
	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERTRLOG ) {
          fprintf(datafile,"%d %d ",  monomial_list[ Integrator.mnls_per_ts[i][j] ].iter0, 
                  monomial_list[ Integrator.mnls_per_ts[i][j] ].iter1);
        }
      }
    }
    fprintf(datafile, "%d %e", accept, etime-atime);
    if(g_rgi_C1 > 0. || g_rgi_C1 < 0) {
      fprintf(datafile, " %e", (*rectangle_energy)/(12*VOLUME*g_nproc));
    }
    fprintf(datafile, "\n");
    fflush(datafile);
    fclose(datafile);
  }
  return(accept);
}
Пример #11
0
int main(int argc, char *argv[])
{
  FILE *parameterfile = NULL;
  int j, i, ix = 0, isample = 0, op_id = 0;
  char datafilename[206];
  char parameterfilename[206];
  char conf_filename[50];
  char * input_filename = NULL;
  char * filename = NULL;
  double plaquette_energy;
  struct stout_parameters params_smear;

#ifdef _KOJAK_INST
#pragma pomp inst init
#pragma pomp inst begin(main)
#endif

#if (defined SSE || defined SSE2 || SSE3)
  signal(SIGILL, &catch_ill_inst);
#endif

  DUM_DERI = 8;
  DUM_MATRIX = DUM_DERI + 5;
  NO_OF_SPINORFIELDS = DUM_MATRIX + 4;

  //4 extra fields (corresponding to DUM_MATRIX+0..5) for deg. and ND matrix mult.  
  NO_OF_SPINORFIELDS_32 = 6;

  verbose = 0;
  g_use_clover_flag = 0;


  process_args(argc,argv,&input_filename,&filename);
  set_default_filenames(&input_filename, &filename);

  init_parallel_and_read_input(argc, argv, input_filename);

  /* this DBW2 stuff is not needed for the inversion ! */
  if (g_dflgcr_flag == 1) {
    even_odd_flag = 0;
  }
  g_rgi_C1 = 0;
  if (Nsave == 0) {
    Nsave = 1;
  }

  if (g_running_phmc) {
    NO_OF_SPINORFIELDS = DUM_MATRIX + 8;
  }

  tmlqcd_mpi_init(argc, argv);

  g_dbw2rand = 0;

  /* starts the single and double precision random number */
  /* generator                                            */
  start_ranlux(rlxd_level, random_seed^nstore);

  /* we need to make sure that we don't have even_odd_flag = 1 */
  /* if any of the operators doesn't use it                    */
  /* in this way even/odd can still be used by other operators */
  for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0;

#ifndef TM_USE_MPI
  g_dbw2rand = 0;
#endif

#ifdef _GAUGE_COPY
  j = init_gauge_field(VOLUMEPLUSRAND, 1);
  j += init_gauge_field_32(VOLUMEPLUSRAND, 1);
#else
  j = init_gauge_field(VOLUMEPLUSRAND, 0);
  j += init_gauge_field_32(VOLUMEPLUSRAND, 0);  
#endif
 
  if (j != 0) {
    fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
    exit(-1);
  }
  j = init_geometry_indices(VOLUMEPLUSRAND);
  if (j != 0) {
    fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n");
    exit(-1);
  }
  if (no_monomials > 0) {
    if (even_odd_flag) {
      j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
    }
    else {
      j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
    }
    if (j != 0) {
      fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
      exit(-1);
    }
  }
  if (even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
    j += init_spinor_field_32(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS_32);   
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
    j += init_spinor_field_32(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS_32);   
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(-1);
  }

  if (g_running_phmc) {
    j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20);
    if (j != 0) {
      fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
      exit(-1);
    }
  }

  g_mu = g_mu1;

  if (g_cart_id == 0) {
    /*construct the filenames for the observables and the parameters*/
    strncpy(datafilename, filename, 200);
    strcat(datafilename, ".data");
    strncpy(parameterfilename, filename, 200);
    strcat(parameterfilename, ".para");

    parameterfile = fopen(parameterfilename, "w");
    write_first_messages(parameterfile, "invert", git_hash);
    fclose(parameterfile);
  }

  /* define the geometry */
  geometry();

  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

  phmc_invmaxev = 1.;

  init_operators();

  /* list and initialize measurements*/
  if(g_proc_id == 0) {
    printf("\n");
    for(int j = 0; j < no_measurements; j++) {
      printf("# measurement id %d, type = %d\n", j, measurement_list[j].type);
    }
  }
  init_measurements();  

  /* this could be maybe moved to init_operators */
#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if (j != 0) {
    fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
    exit(-1);
  }
  /* for mixed precision solvers, the 32 bit halfspinor field must always be there */
  j = init_dirac_halfspinor32();
  if (j != 0)
  {
    fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n");
    exit(-1);
  }
#  if (defined _PERSISTENT)
  if (even_odd_flag)
    init_xchange_halffield();
#  endif
#endif

  for (j = 0; j < Nmeas; j++) {
    sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
    if (g_cart_id == 0) {
      printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
            conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
      fflush(stdout);
    }
    if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) {
      fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
      exit(-2);
    }


    if (g_cart_id == 0) {
      printf("# Finished reading gauge field.\n");
      fflush(stdout);
    }
#ifdef TM_USE_MPI
    xchange_gauge(g_gauge_field);
#endif
    /*Convert to a 32 bit gauge field, after xchange*/
    convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND);
    /*compute the energy of the gauge field*/
    plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

    if (g_cart_id == 0) {
      printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

    if (use_stout_flag == 1){
      params_smear.rho = stout_rho;
      params_smear.iterations = stout_no_iter;
/*       if (stout_smear((su3_tuple*)(g_gauge_field[0]), &params_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */
/*         exit(1) ; */
      g_update_gauge_copy = 1;
      plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

      if (g_cart_id == 0) {
        printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc));
        fflush(stdout);
      }
    }

    /* if any measurements are defined in the input file, do them here */
    measurement * meas;
    for(int imeas = 0; imeas < no_measurements; imeas++){
      meas = &measurement_list[imeas];
      if (g_proc_id == 0) {
        fprintf(stdout, "#\n# Beginning online measurement.\n");
      }
      meas->measurefunc(nstore, imeas, even_odd_flag);
    }

    if (reweighting_flag == 1) {
      reweighting_factor(reweighting_samples, nstore);
    }

    /* Compute minimal eigenvalues, if wanted */
    if (compute_evs != 0) {
      eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision,
                  0, compute_evs, nstore, even_odd_flag);
    }
    if (phmc_compute_evs != 0) {
#ifdef TM_USE_MPI
      MPI_Finalize();
#endif
      return(0);
    }

    /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/
    if(compute_modenumber != 0 || compute_topsus !=0){
      invert_compute_modenumber(); 
    }

    //  set up blocks if Deflation is used 
    if (g_dflgcr_flag) 
      init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z);
    
    if(SourceInfo.type == SRC_TYPE_VOL || SourceInfo.type == SRC_TYPE_PION_TS || SourceInfo.type == SRC_TYPE_GEN_PION_TS) {
      index_start = 0;
      index_end = 1;
    }

    g_precWS=NULL;
    if(use_preconditioning == 1){
      /* todo load fftw wisdom */
#if (defined HAVE_FFTW ) && !( defined TM_USE_MPI)
      loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX);
#else
      use_preconditioning=0;
#endif
    }

    if (g_cart_id == 0) {
      fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/
    }
    for(op_id = 0; op_id < no_operators; op_id++) {
      boundary(operator_list[op_id].kappa);
      g_kappa = operator_list[op_id].kappa; 
      g_mu = operator_list[op_id].mu;
      g_c_sw = operator_list[op_id].c_sw;
      // DFLGCR and DFLFGMRES
      if(operator_list[op_id].solver == DFLGCR || operator_list[op_id].solver == DFLFGMRES) {
        generate_dfl_subspace(g_N_s, VOLUME, reproduce_randomnumber_flag);
      }

      if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){
        printf("# Using preconditioning with treelevel preconditioning operator: %s \n",
              precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver]));
        /* initial preconditioning workspace */
        operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS));
        spinorPrecWS_Init(operator_list[op_id].precWS,
                  operator_list[op_id].kappa,
                  operator_list[op_id].mu/2./operator_list[op_id].kappa,
                  -(0.5/operator_list[op_id].kappa-4.),
                  PRECWSOPERATORSELECT[operator_list[op_id].solver]);
        g_precWS = operator_list[op_id].precWS;

        if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) {
          fitPrecParams(op_id);
        }
      }

      for(isample = 0; isample < no_samples; isample++) {
        for (ix = index_start; ix < index_end; ix++) {
          if (g_cart_id == 0) {
            fprintf(stdout, "#\n"); /*Indicate starting of new index*/
          }
          /* we use g_spinor_field[0-7] for sources and props for the moment */
          /* 0-3 in case of 1 flavour  */
          /* 0-7 in case of 2 flavours */
          prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location, random_seed);
          //randmize initial guess for eigcg if needed-----experimental
          if( (operator_list[op_id].solver == INCREIGCG) && (operator_list[op_id].solver_params.eigcg_rand_guess_opt) ){ //randomize the initial guess
              gaussian_volume_source( operator_list[op_id].prop0, operator_list[op_id].prop1,isample,ix,0); //need to check this
          } 
          operator_list[op_id].inverter(op_id, index_start, 1);
        }
      }


      if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){
        /* free preconditioning workspace */
        spinorPrecWS_Free(operator_list[op_id].precWS);
        free(operator_list[op_id].precWS);
      }

      if(operator_list[op_id].type == OVERLAP){
        free_Dov_WS();
      }

    }
    nstore += Nsave;
  }

#ifdef TM_USE_OMP
  free_omp_accumulators();
#endif
  free_blocks();
  free_dfl_subspace();
  free_gauge_field();
  free_gauge_field_32();
  free_geometry_indices();
  free_spinor_field();
  free_spinor_field_32();  
  free_moment_field();
  free_chi_spinor_field();
  free(filename);
  free(input_filename);
  free(SourceInfo.basename);
  free(PropInfo.basename);
#ifdef TM_USE_QUDA
  _endQuda();
#endif
#ifdef TM_USE_MPI
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
#endif
  return(0);
#ifdef _KOJAK_INST
#pragma pomp inst end(main)
#endif
}
Пример #12
0
int main(int argc, char **argv)
{

  for (int i = 1; i < argc; i++){
    if(process_command_line_option(argc, argv, &i) == 0){
      continue;
    } 
    printfQuda("ERROR: Invalid option:%s\n", argv[i]);
    usage(argv);
  }

  if (prec_sloppy == QUDA_INVALID_PRECISION){
    prec_sloppy = prec;
  }
  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID){
    link_recon_sloppy = link_recon;
  }

  // initialize QMP/MPI, QUDA comms grid and RNG (test_util.cpp)
  initComms(argc, argv, gridsize_from_cmdline);

  display_test_info();

  // *** QUDA parameters begin here.

  if (dslash_type != QUDA_WILSON_DSLASH &&
      dslash_type != QUDA_CLOVER_WILSON_DSLASH &&
      dslash_type != QUDA_TWISTED_MASS_DSLASH &&
      dslash_type != QUDA_DOMAIN_WALL_4D_DSLASH &&
      dslash_type != QUDA_MOBIUS_DWF_DSLASH &&
      dslash_type != QUDA_TWISTED_CLOVER_DSLASH &&
      dslash_type != QUDA_DOMAIN_WALL_DSLASH) {
    printfQuda("dslash_type %d not supported\n", dslash_type);
    exit(0);
  }

  QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
  QudaPrecision cuda_prec = prec;
  QudaPrecision cuda_prec_sloppy = prec_sloppy;
  QudaPrecision cuda_prec_precondition = QUDA_HALF_PRECISION;

  QudaGaugeParam gauge_param = newQudaGaugeParam();
  QudaInvertParam inv_param = newQudaInvertParam();
 
  double kappa5;

  gauge_param.X[0] = xdim;
  gauge_param.X[1] = ydim;
  gauge_param.X[2] = zdim;
  gauge_param.X[3] = tdim;
  inv_param.Ls = 1;

  gauge_param.anisotropy = 1.0;
  gauge_param.type = QUDA_WILSON_LINKS;
  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
  gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
  
  gauge_param.cpu_prec = cpu_prec;
  gauge_param.cuda_prec = cuda_prec;
  gauge_param.reconstruct = link_recon;
  gauge_param.cuda_prec_sloppy = cuda_prec_sloppy;
  gauge_param.reconstruct_sloppy = link_recon_sloppy;
  gauge_param.cuda_prec_precondition = cuda_prec_precondition;
  gauge_param.reconstruct_precondition = link_recon_sloppy;
  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;

  inv_param.dslash_type = dslash_type;

  inv_param.mass = mass;
  inv_param.kappa = 1.0 / (2.0 * (1 + 3/gauge_param.anisotropy + mass));

  if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
    inv_param.mu = 0.12;
    inv_param.epsilon = 0.1385;
    inv_param.twist_flavor = twist_flavor;
    inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) ? 2 : 1;
  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
             dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
    inv_param.m5 = -1.8;
    kappa5 = 0.5/(5 + inv_param.m5);  
    inv_param.Ls = Lsdim;
  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
    inv_param.m5 = -1.8;
    kappa5 = 0.5/(5 + inv_param.m5);  
    inv_param.Ls = Lsdim;
    for(int k = 0; k < Lsdim; k++)
    {
      // b5[k], c[k] values are chosen for arbitrary values,
      // but the difference of them are same as 1.0
      inv_param.b_5[k] = 1.452;
      inv_param.c_5[k] = 0.452;
    }
  }

  // offsets used only by multi-shift solver
  inv_param.num_offset = 4;
  double offset[4] = {0.01, 0.02, 0.03, 0.04};
  for (int i=0; i<inv_param.num_offset; i++) inv_param.offset[i] = offset[i];

  inv_param.inv_type = inv_type;
  if (inv_param.dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
    inv_param.solution_type = QUDA_MAT_SOLUTION;
  } else {
    inv_param.solution_type = multishift ? QUDA_MATPCDAG_MATPC_SOLUTION : QUDA_MATPC_SOLUTION;
  }
  inv_param.matpc_type = matpc_type;

  inv_param.dagger = QUDA_DAG_NO;
  inv_param.mass_normalization = normalization;
  inv_param.solver_normalization = QUDA_DEFAULT_NORMALIZATION;

  if (dslash_type == QUDA_DOMAIN_WALL_DSLASH || 
      dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH ||
      dslash_type == QUDA_MOBIUS_DWF_DSLASH ||
      dslash_type == QUDA_TWISTED_MASS_DSLASH || 
      dslash_type == QUDA_TWISTED_CLOVER_DSLASH || 
      multishift || inv_type == QUDA_CG_INVERTER) {
    inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
  } else {
    inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
  }

  inv_param.pipeline = 0;

  inv_param.Nsteps = 2;
  inv_param.gcrNkrylov = 10;
  inv_param.tol = 1e-7;
  inv_param.tol_restart = 1e-3; //now theoretical background for this parameter... 
#if __COMPUTE_CAPABILITY__ >= 200
  // require both L2 relative and heavy quark residual to determine convergence
  inv_param.residual_type = static_cast<QudaResidualType>(QUDA_L2_RELATIVE_RESIDUAL | QUDA_HEAVY_QUARK_RESIDUAL);
  inv_param.tol_hq = 1e-3; // specify a tolerance for the residual for heavy quark residual
#else
  // Pre Fermi architecture only supports L2 relative residual norm
  inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL;
#endif
  // these can be set individually
  for (int i=0; i<inv_param.num_offset; i++) {
    inv_param.tol_offset[i] = inv_param.tol;
    inv_param.tol_hq_offset[i] = inv_param.tol_hq;
  }
  inv_param.maxiter = 10000;
  inv_param.reliable_delta = 1e-1;
  inv_param.use_sloppy_partial_accumulator = 0;
  inv_param.max_res_increase = 1;

  // domain decomposition preconditioner parameters
  inv_param.inv_type_precondition = precon_type;
    
  inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
  inv_param.precondition_cycle = 1;
  inv_param.tol_precondition = 1e-1;
  inv_param.maxiter_precondition = 10;
  inv_param.verbosity_precondition = QUDA_SILENT;
  inv_param.cuda_prec_precondition = cuda_prec_precondition;
  inv_param.omega = 1.0;

  inv_param.cpu_prec = cpu_prec;
  inv_param.cuda_prec = cuda_prec;
  inv_param.cuda_prec_sloppy = cuda_prec_sloppy;
  inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
  inv_param.dirac_order = QUDA_DIRAC_ORDER;

  inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
  inv_param.output_location = QUDA_CPU_FIELD_LOCATION;

  inv_param.tune = tune ? QUDA_TUNE_YES : QUDA_TUNE_NO;

  gauge_param.ga_pad = 0; // 24*24*24/2;
  inv_param.sp_pad = 0; // 24*24*24/2;
  inv_param.cl_pad = 0; // 24*24*24/2;

  // For multi-GPU, ga_pad must be large enough to store a time-slice
#ifdef MULTI_GPU
  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
  int pad_size =MAX(x_face_size, y_face_size);
  pad_size = MAX(pad_size, z_face_size);
  pad_size = MAX(pad_size, t_face_size);
  gauge_param.ga_pad = pad_size;    
#endif

  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
    inv_param.clover_cpu_prec = cpu_prec;
    inv_param.clover_cuda_prec = cuda_prec;
    inv_param.clover_cuda_prec_sloppy = cuda_prec_sloppy;
    inv_param.clover_cuda_prec_precondition = cuda_prec_precondition;
    inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
    inv_param.clover_coeff = 1.5*inv_param.kappa;
  }

  inv_param.verbosity = QUDA_VERBOSE;

  // *** Everything between here and the call to initQuda() is
  // *** application-specific.

  // set parameters for the reference Dslash, and prepare fields to be loaded
  if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
      dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH ||
      dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
    dw_setDims(gauge_param.X, inv_param.Ls);
  } else {
    setDims(gauge_param.X);
  }

  setSpinorSiteSize(24);

  size_t gSize = (gauge_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
  size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);

  void *gauge[4], *clover_inv=0, *clover=0;

  for (int dir = 0; dir < 4; dir++) {
    gauge[dir] = malloc(V*gaugeSiteSize*gSize);
  }

  if (strcmp(latfile,"")) {  // load in the command line supplied gauge field
    read_gauge_field(latfile, gauge, gauge_param.cpu_prec, gauge_param.X, argc, argv);
    construct_gauge_field(gauge, 2, gauge_param.cpu_prec, &gauge_param);
  } else { // else generate a random SU(3) field
    construct_gauge_field(gauge, 1, gauge_param.cpu_prec, &gauge_param);
  }

  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
    double norm = 0.0; // clover components are random numbers in the range (-norm, norm)
    double diag = 1.0; // constant added to the diagonal

    size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
    clover_inv = malloc(V*cloverSiteSize*cSize);
    construct_clover_field(clover_inv, norm, diag, inv_param.clover_cpu_prec);

    // The uninverted clover term is only needed when solving the unpreconditioned
    // system or when using "asymmetric" even/odd preconditioning.
    int preconditioned = (inv_param.solve_type == QUDA_DIRECT_PC_SOLVE ||
			  inv_param.solve_type == QUDA_NORMOP_PC_SOLVE);
    int asymmetric = preconditioned &&
                         (inv_param.matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC ||
                          inv_param.matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC);
    if (!preconditioned) {
      clover = clover_inv;
      clover_inv = NULL;
    } else if (asymmetric) { // fake it by using the same random matrix
      clover = clover_inv;   // for both clover and clover_inv
    } else {
      clover = NULL;
    }
  }

  void *spinorIn = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
  void *spinorCheck = malloc(V*spinorSiteSize*sSize*inv_param.Ls);

  void *spinorOut = NULL, **spinorOutMulti = NULL;
  if (multishift) {
    spinorOutMulti = (void**)malloc(inv_param.num_offset*sizeof(void *));
    for (int i=0; i<inv_param.num_offset; i++) {
      spinorOutMulti[i] = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
    }
  } else {
    spinorOut = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
  }

  memset(spinorIn, 0, inv_param.Ls*V*spinorSiteSize*sSize);
  memset(spinorCheck, 0, inv_param.Ls*V*spinorSiteSize*sSize);
  if (multishift) {
    for (int i=0; i<inv_param.num_offset; i++) memset(spinorOutMulti[i], 0, inv_param.Ls*V*spinorSiteSize*sSize);    
  } else {
    memset(spinorOut, 0, inv_param.Ls*V*spinorSiteSize*sSize);
  }

  // create a point source at 0 (in each subvolume...  FIXME)

  // create a point source at 0 (in each subvolume...  FIXME)
  
  if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION) {
    //((float*)spinorIn)[0] = 1.0;
    for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((float*)spinorIn)[i] = rand() / (float)RAND_MAX;
  } else {
    //((double*)spinorIn)[0] = 1.0;
    for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((double*)spinorIn)[i] = rand() / (double)RAND_MAX;
  }

  // start the timer
  double time0 = -((double)clock());

  // initialize the QUDA library
  initQuda(device);

  // load the gauge field
  loadGaugeQuda((void*)gauge, &gauge_param);

  // load the clover term, if desired
  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) loadCloverQuda(clover, clover_inv, &inv_param);

  if (dslash_type == QUDA_TWISTED_CLOVER_DSLASH) loadCloverQuda(NULL, NULL, &inv_param);

  // perform the inversion
  if (multishift) {
    invertMultiShiftQuda(spinorOutMulti, spinorIn, &inv_param);
  } else {
    invertQuda(spinorOut, spinorIn, &inv_param);
  }

  // stop the timer
  time0 += clock();
  time0 /= CLOCKS_PER_SEC;
    
  printfQuda("Device memory used:\n   Spinor: %f GiB\n    Gauge: %f GiB\n", 
	 inv_param.spinorGiB, gauge_param.gaugeGiB);
  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
    printfQuda("   Clover: %f GiB\n", inv_param.cloverGiB);
  }
  printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n", 
	 inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);

  if (multishift) {
    if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
      errorQuda("Mass normalization not supported for multi-shift solver in invert_test");
    }

    void *spinorTmp = malloc(V*spinorSiteSize*sSize*inv_param.Ls);

    printfQuda("Host residuum checks: \n");
    for(int i=0; i < inv_param.num_offset; i++) {
      ax(0, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
      
      if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
	if (inv_param.twist_flavor != QUDA_TWIST_MINUS && inv_param.twist_flavor != QUDA_TWIST_PLUS)
	  errorQuda("Twisted mass solution type not supported");
        tm_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 
                 inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
        tm_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 
                 inv_param.matpc_type, 1, inv_param.cpu_prec, gauge_param);
      } else if (dslash_type == QUDA_WILSON_DSLASH || dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
        wil_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.matpc_type, 0,
                  inv_param.cpu_prec, gauge_param);
        wil_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.matpc_type, 1,
                  inv_param.cpu_prec, gauge_param);
      } else {
        printfQuda("Domain wall not supported for multi-shift\n");
        exit(-1);
      }

      axpy(inv_param.offset[i], spinorOutMulti[i], spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
      mxpy(spinorIn, spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
      double nrm2 = norm_2(spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
      double src2 = norm_2(spinorIn, Vh*spinorSiteSize, inv_param.cpu_prec);
      double l2r = sqrt(nrm2 / src2);

      printfQuda("Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
		 i, inv_param.tol_offset[i], inv_param.true_res_offset[i], l2r, 
		 inv_param.tol_hq_offset[i], inv_param.true_res_hq_offset[i]);
    }
    free(spinorTmp);

  } else {
    
    if (inv_param.solution_type == QUDA_MAT_SOLUTION) {

      if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
	if(inv_param.twist_flavor == QUDA_TWIST_PLUS || inv_param.twist_flavor == QUDA_TWIST_MINUS)      
	  tm_mat(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 0, inv_param.cpu_prec, gauge_param);
	else
	{
          int tm_offset = V*spinorSiteSize; //12*spinorRef->Volume(); 	  
	  void *evenOut = spinorCheck;
	  void *oddOut  = cpu_prec == sizeof(double) ? (void*)((double*)evenOut + tm_offset): (void*)((float*)evenOut + tm_offset);
    
	  void *evenIn  = spinorOut;
	  void *oddIn   = cpu_prec == sizeof(double) ? (void*)((double*)evenIn + tm_offset): (void*)((float*)evenIn + tm_offset);
    
	  tm_ndeg_mat(evenOut, oddOut, gauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, 0, inv_param.cpu_prec, gauge_param);	
	}
      } else if (dslash_type == QUDA_WILSON_DSLASH || dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
        wil_mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, inv_param.cpu_prec, gauge_param);
      } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
        dw_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
//      } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
//        dw_4d_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
//      } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
//        mdw_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
      } else {
        printfQuda("Unsupported dslash_type\n");
        exit(-1);
      }
      if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
        if (dslash_type == QUDA_DOMAIN_WALL_DSLASH || 
            dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH ||
            dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
          ax(0.5/kappa5, spinorCheck, V*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
        } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH && twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) {
          ax(0.5/inv_param.kappa, spinorCheck, 2*V*spinorSiteSize, inv_param.cpu_prec);
	} else {
          ax(0.5/inv_param.kappa, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
        }
      }

    } else if(inv_param.solution_type == QUDA_MATPC_SOLUTION) {

      if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
	if (inv_param.twist_flavor != QUDA_TWIST_MINUS && inv_param.twist_flavor != QUDA_TWIST_PLUS)
	  errorQuda("Twisted mass solution type not supported");
        tm_matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 
                 inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
      } else if (dslash_type == QUDA_WILSON_DSLASH || dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
        wil_matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.matpc_type, 0, 
                  inv_param.cpu_prec, gauge_param);
      } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
        dw_matpc(spinorCheck, gauge, spinorOut, kappa5, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass);
      } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
        dw_4d_matpc(spinorCheck, gauge, spinorOut, kappa5, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass);
      } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
        double *kappa_b, *kappa_c;
        kappa_b = (double*)malloc(Lsdim*sizeof(double));
        kappa_c = (double*)malloc(Lsdim*sizeof(double));
        for(int xs = 0 ; xs < Lsdim ; xs++)
        {
          kappa_b[xs] = 1.0/(2*(inv_param.b_5[xs]*(4.0 + inv_param.m5) + 1.0));
          kappa_c[xs] = 1.0/(2*(inv_param.c_5[xs]*(4.0 + inv_param.m5) - 1.0));
        }
        mdw_matpc(spinorCheck, gauge, spinorOut, kappa_b, kappa_c, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
        free(kappa_b);
        free(kappa_c);
      } else {
        printfQuda("Unsupported dslash_type\n");
        exit(-1);
      }

      if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
        if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
            dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH ||
            dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
          ax(0.25/(kappa5*kappa5), spinorCheck, V*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
        } else {
          ax(0.25/(inv_param.kappa*inv_param.kappa), spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
      
	}
      }

    } else if (inv_param.solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {

      void *spinorTmp = malloc(V*spinorSiteSize*sSize*inv_param.Ls);

      ax(0, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
      
      if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
	if (inv_param.twist_flavor != QUDA_TWIST_MINUS && inv_param.twist_flavor != QUDA_TWIST_PLUS)
	  errorQuda("Twisted mass solution type not supported");
        tm_matpc(spinorTmp, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 
                 inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
        tm_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 
                 inv_param.matpc_type, 1, inv_param.cpu_prec, gauge_param);
      } else if (dslash_type == QUDA_WILSON_DSLASH || dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
        wil_matpc(spinorTmp, gauge, spinorOut, inv_param.kappa, inv_param.matpc_type, 0,
                  inv_param.cpu_prec, gauge_param);
        wil_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.matpc_type, 1,
                  inv_param.cpu_prec, gauge_param);
      } else {
        printfQuda("Unsupported dslash_type\n");
        exit(-1);
      }

      if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
	errorQuda("Mass normalization not implemented");
      }

      free(spinorTmp);
    }


    int vol = inv_param.solution_type == QUDA_MAT_SOLUTION ? V : Vh;
    mxpy(spinorIn, spinorCheck, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
    double nrm2 = norm_2(spinorCheck, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
    double src2 = norm_2(spinorIn, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
    double l2r = sqrt(nrm2 / src2);

    printfQuda("Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
	       inv_param.tol, inv_param.true_res, l2r, inv_param.tol_hq, inv_param.true_res_hq);

  }

  freeGaugeQuda();
  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) freeCloverQuda();
  
  // finalize the QUDA library
  endQuda();

  finalizeComms();

  return 0;
}