Exemple #1
0
int main(int argc, char *argv[])
{

  FILE *parameterfile = NULL;
  int c, j;
  char * filename = NULL;
  char datafilename[50];
  char parameterfilename[50];
  char conf_filename[50];
  char * input_filename = NULL;
  char * xlfmessage = NULL;
  char * gaugelfn = NULL;
  char * gaugecksum = NULL;
  double plaquette_energy;

#ifdef _KOJAK_INST
#pragma pomp inst init
#pragma pomp inst begin(main)
#endif

#ifdef HAVE_LIBLEMON
  MPI_File fh;
  LemonWriter *lemonWriter;
  paramsXlfInfo *xlfInfo;
  paramsPropagatorFormat *propagatorFormat;
#endif

#if (defined SSE || defined SSE2 || SSE3)
  signal(SIGILL, &catch_ill_inst);
#endif

  DUM_DERI = 6;
  /* DUM_DERI + 2 is enough (not 7) */
  DUM_SOLVER = DUM_DERI + 3;
  DUM_MATRIX = DUM_SOLVER + 8;
  /* DUM_MATRIX + 2 is enough (not 6) */
  NO_OF_SPINORFIELDS = DUM_MATRIX + 2;

  verbose = 0;
  g_use_clover_flag = 0;

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  while ((c = getopt(argc, argv, "h?f:o:")) != -1) {
    switch (c) {
      case 'f':
        input_filename = calloc(200, sizeof(char));
        strcpy(input_filename, optarg);
        break;
      case 'o':
        filename = calloc(200, sizeof(char));
        strcpy(filename, optarg);
        break;
      case 'h':
      case '?':
      default:
        usage();
        break;
    }
  }
  if (input_filename == NULL) {
    input_filename = "hmc.input";
  }
  if (filename == NULL) {
    filename = "output";
  }

  /* Read the input file */
  read_input(input_filename);
  if (solver_flag == 12 && even_odd_flag == 1) {
    even_odd_flag = 0;
    if (g_proc_id == 0) {
      fprintf(stderr, "CGMMS works only without even/odd! Forcing!\n");
    }
  }

  /* this DBW2 stuff is not needed for the inversion ! */
  if (g_dflgcr_flag == 1) {
    even_odd_flag = 0;
  }
  g_rgi_C1 = 0;
  if (Nsave == 0) {
    Nsave = 1;
  }

  if(g_running_phmc) {
    NO_OF_SPINORFIELDS = DUM_MATRIX + 8;
  }

  mpi_init(argc, argv);

  g_dbw2rand = 0;

  /* starts the single and double precision random number */
  /* generator                                            */
  start_ranlux(rlxd_level, random_seed);

#ifndef MPI
  g_dbw2rand = 0;
#endif

#ifdef _GAUGE_COPY
  j = init_gauge_field(VOLUMEPLUSRAND, 1);
#else
  j = init_gauge_field(VOLUMEPLUSRAND, 0);
#endif
  if(j != 0) {
    fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
    exit(-1);
  }
  j = init_geometry_indices(VOLUMEPLUSRAND);
  if(j != 0) {
    fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n");
    exit(-1);
  }
  if(no_monomials > 0) {
    if(even_odd_flag) {
      j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
    }
    else {
      j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
    }
    if(j != 0) {
      fprintf(stderr, "Not enough memory for monomial pseudo fermion  fields! Aborting...\n");
      exit(0);
    }
  }
  if(even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
  }
  if(j != 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(-1);
  }

  if(g_running_phmc) {
    j = init_chi_up_spinor_field(VOLUMEPLUSRAND / 2, 20);
    if(j != 0) {
      fprintf(stderr, "Not enough memory for PHMC Chi_up fields! Aborting...\n");
      exit(0);
    }
    j = init_chi_dn_spinor_field(VOLUMEPLUSRAND / 2, 20);
    if(j != 0) {
      fprintf(stderr, "Not enough memory for PHMC Chi_dn fields! Aborting...\n");
      exit(0);
    }
  }

  g_mu = g_mu1;
  if(g_proc_id == 0) {
    /*construct the filenames for the observables and the parameters*/
    strcpy(datafilename, filename);
    strcat(datafilename, ".data");
    strcpy(parameterfilename, filename);
    strcat(parameterfilename, ".para");

    parameterfile = fopen(parameterfilename, "w");
    write_first_messages(parameterfile, 1);
    fclose(parameterfile);
  }

  /* this is for the extra masses of the CGMMS */
  if (solver_flag == 12 && g_no_extra_masses > 0) {
    if ((parameterfile = fopen("extra_masses.input", "r")) != NULL) {
      for (j = 0; j < g_no_extra_masses; j++) {
        fscanf(parameterfile, "%lf", &g_extra_masses[j]);
        if (g_proc_id == 0 && g_debug_level > 0) {
          printf("# g_extra_masses[%d] = %lf\n", j, g_extra_masses[j]);
        }
      }
      fclose(parameterfile);
    }
    else {
      fprintf(stderr, "Could not open file extra_masses.input!\n");
      g_no_extra_masses = 0;
    }
  }

  /* define the geometry */
  geometry();

  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

  phmc_invmaxev = 1.;


#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if (j != 0) {
    fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
    exit(-1);
  }
  if (g_sloppy_precision_flag == 1) {
    j = init_dirac_halfspinor32();
    if (j != 0) {
      fprintf(stderr, "Not enough memory for 32-Bit halffield! Aborting...\n");
      exit(-1);
    }
  }
#  if (defined _PERSISTENT)
  if (even_odd_flag) {
    init_xchange_halffield();
  }
#  endif
#endif

  for (j = 0; j < Nmeas; j++) {
    sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
    if (g_proc_id == 0) {
      printf("Reading Gauge field from file %s\n", conf_filename);
      fflush(stdout);
    }
#ifdef HAVE_LIBLEMON
    read_lemon_gauge_field_parallel(conf_filename, &gaugecksum, &xlfmessage, &gaugelfn);
#else /* HAVE_LIBLEMON */
    if (xlfmessage != (char*)NULL)
      free(xlfmessage);
    if (gaugelfn != (char*)NULL)
      free(gaugelfn);
    if (gaugecksum != (char*)NULL)
      free(gaugecksum);
    read_lime_gauge_field(conf_filename);
    xlfmessage = read_message(conf_filename, "xlf-info");
    gaugelfn = read_message(conf_filename, "ildg-data-lfn");
    gaugecksum = read_message(conf_filename, "scidac-checksum");
    printf("%s \n", gaugecksum);
#endif /* HAVE_LIBLEMON */
    if (g_proc_id == 0) {
      printf("done!\n");
      fflush(stdout);
    }
    /*     unit_g_gauge_field(); */
#ifdef MPI
    xchange_gauge(g_gauge_field);
#endif

    /*compute the energy of the gauge field*/
    plaquette_energy = measure_gauge_action();

    if (g_proc_id == 0) {
      printf("The plaquette value is %e\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

    if (use_stout_flag == 1) {
      if (stout_smear_gauge_field(stout_rho , stout_no_iter) != 0) {
        exit(1) ;
      }
      plaquette_energy = measure_gauge_action();

      if (g_proc_id == 0) {
        printf("The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc));
        fflush(stdout);
      }
    }

	/* Compute minimal eigenvalues, necessary for overlap! */
	if (compute_evs != 0)
		eigenvalues(&no_eigenvalues, max_solver_iterations, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag);
	else {
		compute_evs = 1;
		no_eigenvalues = 1;
		eigenvalues(&no_eigenvalues, max_solver_iterations, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag);
		no_eigenvalues = 0;
		compute_evs = 0;
	}

	if (phmc_compute_evs != 0) {
#ifdef MPI
		MPI_Finalize();
#endif
		return (0);
	}

	/* here we can do something */
	ov_n_cheby = (-log(delta))/(2*sqrt(ev_minev));
	printf("// Degree of cheby polynomial: %d\n", ov_n_cheby);
//    g_mu = 0.;
	ov_check_locality();
//	ov_check_ginsparg_wilson_relation_strong();
//	ov_compare_4x4("overlap.mat");
//	ov_compare_12x12("overlap.mat");
//	ov_save_12x12("overlap.mat");
//	ov_check_operator(1,0,0,0);

    nstore += Nsave;
  }
#ifdef MPI
  MPI_Finalize();
#endif

  free_blocks();
  free_dfl_subspace();
  free_gauge_field();
  free_geometry_indices();
  free_spinor_field();
  free_moment_field();
  if (g_running_phmc) {
    free_chi_up_spinor_field();
    free_chi_dn_spinor_field();
  }
  return(0);
#ifdef _KOJAK_INST
#pragma pomp inst end(main)
#endif
}
Exemple #2
0
int main(int argc, char *argv[])
{
  FILE *parameterfile = NULL;
  int c, j, i, ix = 0, isample = 0, op_id = 0;
  char * filename = NULL;
  char datafilename[50];
  char parameterfilename[50];
  char conf_filename[50];
  char * input_filename = NULL;
  double plaquette_energy;
  struct stout_parameters params_smear;
  spinor **s, *s_;

#ifdef _KOJAK_INST
#pragma pomp inst init
#pragma pomp inst begin(main)
#endif
  

#if (defined SSE || defined SSE2 || SSE3)
  signal(SIGILL, &catch_ill_inst);
#endif

  DUM_DERI = 8;
  DUM_MATRIX = DUM_DERI + 5;
#if ((defined BGL && defined XLC) || defined _USE_TSPLITPAR)
  NO_OF_SPINORFIELDS = DUM_MATRIX + 3;
#else
  NO_OF_SPINORFIELDS = DUM_MATRIX + 3;
#endif

  verbose = 0;
  g_use_clover_flag = 0;

#ifdef MPI

#  ifdef OMP
  int mpi_thread_provided;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided);
#  else
  MPI_Init(&argc, &argv);
#  endif

  MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id);
#else
  g_proc_id = 0;
#endif

  while ((c = getopt(argc, argv, "h?vVf:o:")) != -1) {
    switch (c) {
      case 'f':
        input_filename = calloc(200, sizeof(char));
        strcpy(input_filename, optarg);
        break;
      case 'o':
        filename = calloc(200, sizeof(char));
        strcpy(filename, optarg);
        break;
      case 'v':
        verbose = 1;
        break;
      case 'V':
        fprintf(stdout,"%s %s\n",PACKAGE_STRING,git_hash);
        exit(0);
        break;
      case 'h':
      case '?':
      default:
        usage();
        break;
    }
  }
  if (input_filename == NULL) {
    input_filename = "invert.input";
  }
  if (filename == NULL) {
    filename = "output";
  }

  /* Read the input file */
  if( (j = read_input(input_filename)) != 0) {
    fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename);
    exit(-1);
  }

#ifdef OMP
  if(omp_num_threads > 0) 
  {
     omp_set_num_threads(omp_num_threads);
  }
  else {
    if( g_proc_id == 0 )
      printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n");

    omp_num_threads = 1;
    omp_set_num_threads(omp_num_threads);
  }

  init_omp_accumulators(omp_num_threads);
#endif

  /* this DBW2 stuff is not needed for the inversion ! */
  if (g_dflgcr_flag == 1) {
    even_odd_flag = 0;
  }
  g_rgi_C1 = 0;
  if (Nsave == 0) {
    Nsave = 1;
  }

  if (g_running_phmc) {
    NO_OF_SPINORFIELDS = DUM_MATRIX + 8;
  }

  tmlqcd_mpi_init(argc, argv);

  g_dbw2rand = 0;

  /* starts the single and double precision random number */
  /* generator                                            */
  start_ranlux(rlxd_level, random_seed);

  /* we need to make sure that we don't have even_odd_flag = 1 */
  /* if any of the operators doesn't use it                    */
  /* in this way even/odd can still be used by other operators */
  for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0;

#ifndef MPI
  g_dbw2rand = 0;
#endif

#ifdef _GAUGE_COPY
  j = init_gauge_field(VOLUMEPLUSRAND, 1);
#else
  j = init_gauge_field(VOLUMEPLUSRAND, 0);
#endif
  if (j != 0) {
    fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
    exit(-1);
  }
  j = init_geometry_indices(VOLUMEPLUSRAND);
  if (j != 0) {
    fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n");
    exit(-1);
  }
  if (no_monomials > 0) {
    if (even_odd_flag) {
      j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
    }
    else {
      j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
    }
    if (j != 0) {
      fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
      exit(-1);
    }
  }
  if (even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(-1);
  }

  if (g_running_phmc) {
    j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20);
    if (j != 0) {
      fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
      exit(-1);
    }
  }

  g_mu = g_mu1;
  if (g_cart_id == 0) {
    /*construct the filenames for the observables and the parameters*/
    strcpy(datafilename, filename);
    strcat(datafilename, ".data");
    strcpy(parameterfilename, filename);
    strcat(parameterfilename, ".para");

    parameterfile = fopen(parameterfilename, "w");
    write_first_messages(parameterfile, 1);
    fclose(parameterfile);
  }

  /* define the geometry */
  geometry();

  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

  phmc_invmaxev = 1.;

  init_operators();

  /* this could be maybe moved to init_operators */
#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if (j != 0) {
    fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
    exit(-1);
  }
  if (g_sloppy_precision_flag == 1) {
    j = init_dirac_halfspinor32();
    if (j != 0)
    {
      fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n");
      exit(-1);
    }
  }
#  if (defined _PERSISTENT)
  if (even_odd_flag)
    init_xchange_halffield();
#  endif
#endif

  for (j = 0; j < Nmeas; j++) {
    sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
    if (g_cart_id == 0) {
      printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
            conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
      fflush(stdout);
    }
    if( (i = read_gauge_field(conf_filename)) !=0) {
      fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
      exit(-2);
    }


    if (g_cart_id == 0) {
      printf("# Finished reading gauge field.\n");
      fflush(stdout);
    }
#ifdef MPI
    xchange_gauge(g_gauge_field);
#endif

    /*compute the energy of the gauge field*/
    plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field);

    if (g_cart_id == 0) {
      printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

    if (use_stout_flag == 1){
      params_smear.rho = stout_rho;
      params_smear.iterations = stout_no_iter;
/*       if (stout_smear((su3_tuple*)(g_gauge_field[0]), &params_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */
/*         exit(1) ; */
      g_update_gauge_copy = 1;
      g_update_gauge_energy = 1;
      g_update_rectangle_energy = 1;
      plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field);

      if (g_cart_id == 0) {
        printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc));
        fflush(stdout);
      }
    }

    if (reweighting_flag == 1) {
      reweighting_factor(reweighting_samples, nstore);
    }

    /* Compute minimal eigenvalues, if wanted */
    if (compute_evs != 0) {
      eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision,
                  0, compute_evs, nstore, even_odd_flag);
    }
    if (phmc_compute_evs != 0) {
#ifdef MPI
      MPI_Finalize();
#endif
      return(0);
    }

    /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/

    if(compute_modenumber != 0 || compute_topsus !=0){
      
      s_ = calloc(no_sources_z2*VOLUMEPLUSRAND+1, sizeof(spinor));
      s  = calloc(no_sources_z2, sizeof(spinor*));
      if(s_ == NULL) { 
	printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); 
      }
      if(s == NULL) { 
	printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); 
      }
      
      
      for(i = 0; i < no_sources_z2; i++) {
#if (defined SSE3 || defined SSE2 || defined SSE)
        s[i] = (spinor*)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE)+i*VOLUMEPLUSRAND;
#else
        s[i] = s_+i*VOLUMEPLUSRAND;
#endif
	
        z2_random_spinor_field(s[i], VOLUME);
	
/* 	what is this here needed for?? */
/*         spinor *aux_,*aux; */
/* #if ( defined SSE || defined SSE2 || defined SSE3 ) */
/*         aux_=calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); */
/*         aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); */
/* #else */
/*         aux_=calloc(VOLUMEPLUSRAND, sizeof(spinor)); */
/*         aux = aux_; */
/* #endif */
	
        if(g_proc_id == 0) {
          printf("source %d \n", i);
        }
	
        if(compute_modenumber != 0){
          mode_number(s[i], mstarsq);
        }
	
        if(compute_topsus !=0) {
          top_sus(s[i], mstarsq);
        }
      }
      free(s);
      free(s_);
    }


    /* move to operators as well */
    if (g_dflgcr_flag == 1) {
      /* set up deflation blocks */
      init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z);

      /* the can stay here for now, but later we probably need */
      /* something like init_dfl_solver called somewhere else  */
      /* create set of approximate lowest eigenvectors ("global deflation subspace") */

      /*       g_mu = 0.; */
      /*       boundary(0.125); */
      generate_dfl_subspace(g_N_s, VOLUME);
      /*       boundary(g_kappa); */
      /*       g_mu = g_mu1; */

      /* Compute little Dirac operators */
      /*       alt_block_compute_little_D(); */
      if (g_debug_level > 0) {
        check_projectors();
        check_local_D();
      }
      if (g_debug_level > 1) {
        check_little_D_inversion();
      }

    }
    if(SourceInfo.type == 1) {
      index_start = 0;
      index_end = 1;
    }

    g_precWS=NULL;
    if(use_preconditioning == 1){
      /* todo load fftw wisdom */
#if (defined HAVE_FFTW ) && !( defined MPI)
      loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX);
#else
      use_preconditioning=0;
#endif
    }

    if (g_cart_id == 0) {
      fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/
    }
    for(op_id = 0; op_id < no_operators; op_id++) {
      boundary(operator_list[op_id].kappa);
      g_kappa = operator_list[op_id].kappa; 
      g_mu = 0.;

      if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){
        printf("# Using preconditioning with treelevel preconditioning operator: %s \n",
              precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver]));
        /* initial preconditioning workspace */
        operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS));
        spinorPrecWS_Init(operator_list[op_id].precWS,
                  operator_list[op_id].kappa,
                  operator_list[op_id].mu/2./operator_list[op_id].kappa,
                  -(0.5/operator_list[op_id].kappa-4.),
                  PRECWSOPERATORSELECT[operator_list[op_id].solver]);
        g_precWS = operator_list[op_id].precWS;

        if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) {
          fitPrecParams(op_id);
        }
      }

      for(isample = 0; isample < no_samples; isample++) {
        for (ix = index_start; ix < index_end; ix++) {
          if (g_cart_id == 0) {
            fprintf(stdout, "#\n"); /*Indicate starting of new index*/
          }
          /* we use g_spinor_field[0-7] for sources and props for the moment */
          /* 0-3 in case of 1 flavour  */
          /* 0-7 in case of 2 flavours */
          prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location);
          operator_list[op_id].inverter(op_id, index_start);
        }
      }


      if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){
        /* free preconditioning workspace */
        spinorPrecWS_Free(operator_list[op_id].precWS);
        free(operator_list[op_id].precWS);
      }

      if(operator_list[op_id].type == OVERLAP){
        free_Dov_WS();
      }

    }
    nstore += Nsave;
  }

#ifdef MPI
  MPI_Finalize();
#endif
#ifdef OMP
  free_omp_accumulators();
#endif
  free_blocks();
  free_dfl_subspace();
  free_gauge_field();
  free_geometry_indices();
  free_spinor_field();
  free_moment_field();
  free_chi_spinor_field();
  return(0);
#ifdef _KOJAK_INST
#pragma pomp inst end(main)
#endif
}
Exemple #3
0
int main(int argc, char *argv[])
{
  FILE *parameterfile = NULL;
  int j, i, ix = 0, isample = 0, op_id = 0;
  char datafilename[206];
  char parameterfilename[206];
  char conf_filename[50];
  char * input_filename = NULL;
  char * filename = NULL;
  double plaquette_energy;
  struct stout_parameters params_smear;

#ifdef _KOJAK_INST
#pragma pomp inst init
#pragma pomp inst begin(main)
#endif

#if (defined SSE || defined SSE2 || SSE3)
  signal(SIGILL, &catch_ill_inst);
#endif

  DUM_DERI = 8;
  DUM_MATRIX = DUM_DERI + 5;
  NO_OF_SPINORFIELDS = DUM_MATRIX + 4;

  //4 extra fields (corresponding to DUM_MATRIX+0..5) for deg. and ND matrix mult.  
  NO_OF_SPINORFIELDS_32 = 6;

  verbose = 0;
  g_use_clover_flag = 0;


  process_args(argc,argv,&input_filename,&filename);
  set_default_filenames(&input_filename, &filename);

  init_parallel_and_read_input(argc, argv, input_filename);

  /* this DBW2 stuff is not needed for the inversion ! */
  if (g_dflgcr_flag == 1) {
    even_odd_flag = 0;
  }
  g_rgi_C1 = 0;
  if (Nsave == 0) {
    Nsave = 1;
  }

  if (g_running_phmc) {
    NO_OF_SPINORFIELDS = DUM_MATRIX + 8;
  }

  tmlqcd_mpi_init(argc, argv);

  g_dbw2rand = 0;

  /* starts the single and double precision random number */
  /* generator                                            */
  start_ranlux(rlxd_level, random_seed^nstore);

  /* we need to make sure that we don't have even_odd_flag = 1 */
  /* if any of the operators doesn't use it                    */
  /* in this way even/odd can still be used by other operators */
  for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0;

#ifndef TM_USE_MPI
  g_dbw2rand = 0;
#endif

#ifdef _GAUGE_COPY
  j = init_gauge_field(VOLUMEPLUSRAND, 1);
  j += init_gauge_field_32(VOLUMEPLUSRAND, 1);
#else
  j = init_gauge_field(VOLUMEPLUSRAND, 0);
  j += init_gauge_field_32(VOLUMEPLUSRAND, 0);  
#endif
 
  if (j != 0) {
    fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
    exit(-1);
  }
  j = init_geometry_indices(VOLUMEPLUSRAND);
  if (j != 0) {
    fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n");
    exit(-1);
  }
  if (no_monomials > 0) {
    if (even_odd_flag) {
      j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
    }
    else {
      j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
    }
    if (j != 0) {
      fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
      exit(-1);
    }
  }
  if (even_odd_flag) {
    j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
    j += init_spinor_field_32(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS_32);   
  }
  else {
    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
    j += init_spinor_field_32(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS_32);   
  }
  if (j != 0) {
    fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
    exit(-1);
  }

  if (g_running_phmc) {
    j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20);
    if (j != 0) {
      fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
      exit(-1);
    }
  }

  g_mu = g_mu1;

  if (g_cart_id == 0) {
    /*construct the filenames for the observables and the parameters*/
    strncpy(datafilename, filename, 200);
    strcat(datafilename, ".data");
    strncpy(parameterfilename, filename, 200);
    strcat(parameterfilename, ".para");

    parameterfile = fopen(parameterfilename, "w");
    write_first_messages(parameterfile, "invert", git_hash);
    fclose(parameterfile);
  }

  /* define the geometry */
  geometry();

  /* define the boundary conditions for the fermion fields */
  boundary(g_kappa);

  phmc_invmaxev = 1.;

  init_operators();

  /* list and initialize measurements*/
  if(g_proc_id == 0) {
    printf("\n");
    for(int j = 0; j < no_measurements; j++) {
      printf("# measurement id %d, type = %d\n", j, measurement_list[j].type);
    }
  }
  init_measurements();  

  /* this could be maybe moved to init_operators */
#ifdef _USE_HALFSPINOR
  j = init_dirac_halfspinor();
  if (j != 0) {
    fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
    exit(-1);
  }
  /* for mixed precision solvers, the 32 bit halfspinor field must always be there */
  j = init_dirac_halfspinor32();
  if (j != 0)
  {
    fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n");
    exit(-1);
  }
#  if (defined _PERSISTENT)
  if (even_odd_flag)
    init_xchange_halffield();
#  endif
#endif

  for (j = 0; j < Nmeas; j++) {
    sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
    if (g_cart_id == 0) {
      printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
            conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
      fflush(stdout);
    }
    if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) {
      fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
      exit(-2);
    }


    if (g_cart_id == 0) {
      printf("# Finished reading gauge field.\n");
      fflush(stdout);
    }
#ifdef TM_USE_MPI
    xchange_gauge(g_gauge_field);
#endif
    /*Convert to a 32 bit gauge field, after xchange*/
    convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND);
    /*compute the energy of the gauge field*/
    plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

    if (g_cart_id == 0) {
      printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

    if (use_stout_flag == 1){
      params_smear.rho = stout_rho;
      params_smear.iterations = stout_no_iter;
/*       if (stout_smear((su3_tuple*)(g_gauge_field[0]), &params_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */
/*         exit(1) ; */
      g_update_gauge_copy = 1;
      plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

      if (g_cart_id == 0) {
        printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc));
        fflush(stdout);
      }
    }

    /* if any measurements are defined in the input file, do them here */
    measurement * meas;
    for(int imeas = 0; imeas < no_measurements; imeas++){
      meas = &measurement_list[imeas];
      if (g_proc_id == 0) {
        fprintf(stdout, "#\n# Beginning online measurement.\n");
      }
      meas->measurefunc(nstore, imeas, even_odd_flag);
    }

    if (reweighting_flag == 1) {
      reweighting_factor(reweighting_samples, nstore);
    }

    /* Compute minimal eigenvalues, if wanted */
    if (compute_evs != 0) {
      eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision,
                  0, compute_evs, nstore, even_odd_flag);
    }
    if (phmc_compute_evs != 0) {
#ifdef TM_USE_MPI
      MPI_Finalize();
#endif
      return(0);
    }

    /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/
    if(compute_modenumber != 0 || compute_topsus !=0){
      invert_compute_modenumber(); 
    }

    //  set up blocks if Deflation is used 
    if (g_dflgcr_flag) 
      init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z);
    
    if(SourceInfo.type == SRC_TYPE_VOL || SourceInfo.type == SRC_TYPE_PION_TS || SourceInfo.type == SRC_TYPE_GEN_PION_TS) {
      index_start = 0;
      index_end = 1;
    }

    g_precWS=NULL;
    if(use_preconditioning == 1){
      /* todo load fftw wisdom */
#if (defined HAVE_FFTW ) && !( defined TM_USE_MPI)
      loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX);
#else
      use_preconditioning=0;
#endif
    }

    if (g_cart_id == 0) {
      fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/
    }
    for(op_id = 0; op_id < no_operators; op_id++) {
      boundary(operator_list[op_id].kappa);
      g_kappa = operator_list[op_id].kappa; 
      g_mu = operator_list[op_id].mu;
      g_c_sw = operator_list[op_id].c_sw;
      // DFLGCR and DFLFGMRES
      if(operator_list[op_id].solver == DFLGCR || operator_list[op_id].solver == DFLFGMRES) {
        generate_dfl_subspace(g_N_s, VOLUME, reproduce_randomnumber_flag);
      }

      if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){
        printf("# Using preconditioning with treelevel preconditioning operator: %s \n",
              precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver]));
        /* initial preconditioning workspace */
        operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS));
        spinorPrecWS_Init(operator_list[op_id].precWS,
                  operator_list[op_id].kappa,
                  operator_list[op_id].mu/2./operator_list[op_id].kappa,
                  -(0.5/operator_list[op_id].kappa-4.),
                  PRECWSOPERATORSELECT[operator_list[op_id].solver]);
        g_precWS = operator_list[op_id].precWS;

        if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) {
          fitPrecParams(op_id);
        }
      }

      for(isample = 0; isample < no_samples; isample++) {
        for (ix = index_start; ix < index_end; ix++) {
          if (g_cart_id == 0) {
            fprintf(stdout, "#\n"); /*Indicate starting of new index*/
          }
          /* we use g_spinor_field[0-7] for sources and props for the moment */
          /* 0-3 in case of 1 flavour  */
          /* 0-7 in case of 2 flavours */
          prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location, random_seed);
          //randmize initial guess for eigcg if needed-----experimental
          if( (operator_list[op_id].solver == INCREIGCG) && (operator_list[op_id].solver_params.eigcg_rand_guess_opt) ){ //randomize the initial guess
              gaussian_volume_source( operator_list[op_id].prop0, operator_list[op_id].prop1,isample,ix,0); //need to check this
          } 
          operator_list[op_id].inverter(op_id, index_start, 1);
        }
      }


      if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){
        /* free preconditioning workspace */
        spinorPrecWS_Free(operator_list[op_id].precWS);
        free(operator_list[op_id].precWS);
      }

      if(operator_list[op_id].type == OVERLAP){
        free_Dov_WS();
      }

    }
    nstore += Nsave;
  }

#ifdef TM_USE_OMP
  free_omp_accumulators();
#endif
  free_blocks();
  free_dfl_subspace();
  free_gauge_field();
  free_gauge_field_32();
  free_geometry_indices();
  free_spinor_field();
  free_spinor_field_32();  
  free_moment_field();
  free_chi_spinor_field();
  free(filename);
  free(input_filename);
  free(SourceInfo.basename);
  free(PropInfo.basename);
#ifdef TM_USE_QUDA
  _endQuda();
#endif
#ifdef TM_USE_MPI
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
#endif
  return(0);
#ifdef _KOJAK_INST
#pragma pomp inst end(main)
#endif
}