Exemplo n.º 1
0
void invertQuda(void *hp_x, void *hp_b, QudaInvertParam *param)
{
  // check the gauge fields have been created
  cudaGaugeField *cudaGauge = checkGauge(param);

  checkInvertParam(param);
  if (param->cuda_prec_sloppy != param->prec_precondition && 
      param->inv_type_precondition != QUDA_INVALID_INVERTER)
    errorQuda("Sorry, cannot yet use different sloppy and preconditioner precisions");

  verbosity = param->verbosity;

  bool pc_solve = (param->solve_type == QUDA_DIRECT_PC_SOLVE ||
		   param->solve_type == QUDA_NORMEQ_PC_SOLVE);

  bool pc_solution = (param->solution_type == QUDA_MATPC_SOLUTION ||
		      param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION);

  param->spinorGiB = cudaGauge->VolumeCB() * spinorSiteSize;
  if (!pc_solve) param->spinorGiB *= 2;
  param->spinorGiB *= (param->cuda_prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float));
  if (param->preserve_source == QUDA_PRESERVE_SOURCE_NO) {
    param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 5 : 7)/(double)(1<<30);
  } else {
    param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 8 : 9)/(double)(1<<30);
  }

  param->secs = 0;
  param->gflops = 0;
  param->iter = 0;

  // create the dirac operator
  DiracParam diracParam;
  createDirac(diracParam, *param, pc_solve);
  Dirac &dirac = *d;
  Dirac &diracSloppy = *dSloppy;
  Dirac &diracPre = *dPre;

  cpuColorSpinorField *h_b = NULL;
  cpuColorSpinorField *h_x = NULL;
  cudaColorSpinorField *b = NULL;
  cudaColorSpinorField *x = NULL;
  cudaColorSpinorField *in = NULL;
  cudaColorSpinorField *out = NULL;

  const int *X = cudaGauge->X();

  // wrap CPU host side pointers
  ColorSpinorParam cpuParam(hp_b, *param, X, pc_solution);
  h_b = new cpuColorSpinorField(cpuParam);
  cpuParam.v = hp_x;
  h_x = new cpuColorSpinorField(cpuParam);
    
  // download source
  ColorSpinorParam cudaParam(cpuParam, *param);     
  cudaParam.create = QUDA_COPY_FIELD_CREATE;
  b = new cudaColorSpinorField(*h_b, cudaParam); 

  if (param->use_init_guess == QUDA_USE_INIT_GUESS_YES) { // download initial guess
    x = new cudaColorSpinorField(*h_x, cudaParam); // solution  
  } else { // zero initial guess
    cudaParam.create = QUDA_ZERO_FIELD_CREATE;
    x = new cudaColorSpinorField(cudaParam); // solution
  }
    
  if (param->verbosity >= QUDA_VERBOSE) {
    double nh_b = norm2(*h_b);
    double nb = norm2(*b);
    printfQuda("Source: CPU = %f, CUDA copy = %f\n", nh_b, nb);
  }

  tuneDirac(*param, pc_solution ? *x : x->Even());

  dirac.prepare(in, out, *x, *b, param->solution_type);
  if (param->verbosity >= QUDA_VERBOSE) {
    double nin = norm2(*in);
    printfQuda("Prepared source = %f\n", nin);   
  }

  massRescale(param->dslash_type, diracParam.kappa, param->solution_type, param->mass_normalization, *in);

  switch (param->inv_type) {
  case QUDA_CG_INVERTER:
    if (param->solution_type != QUDA_MATDAG_MAT_SOLUTION && param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) {
      copyCuda(*out, *in);
      dirac.Mdag(*in, *out);
    }
    {
      DiracMdagM m(dirac), mSloppy(diracSloppy);
      CG cg(m, mSloppy, *param);
      cg(*out, *in);
    }
    break;
  case QUDA_BICGSTAB_INVERTER:
    if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
      DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      BiCGstab bicg(m, mSloppy, mPre, *param);
      bicg(*out, *in);
      copyCuda(*in, *out);
    }
    {
      DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      BiCGstab bicg(m, mSloppy, mPre, *param);
      bicg(*out, *in);
    }
    break;
  case QUDA_GCR_INVERTER:
    if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
      DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      GCR gcr(m, mSloppy, mPre, *param);
      gcr(*out, *in);
      copyCuda(*in, *out);
    }
    {
      DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      GCR gcr(m, mSloppy, mPre, *param);
      gcr(*out, *in);
    }
    break;
  default:
    errorQuda("Inverter type %d not implemented", param->inv_type);
  }
  
  if (param->verbosity >= QUDA_VERBOSE){
   double nx = norm2(*x);
   printfQuda("Solution = %f\n",nx);
  }
  dirac.reconstruct(*x, *b, param->solution_type);
  
  x->saveCPUSpinorField(*h_x); // since this is a reference, this won't work: h_x = x;
  
  if (param->verbosity >= QUDA_VERBOSE){
    double nx = norm2(*x);
    double nh_x = norm2(*h_x);
    printfQuda("Reconstructed: CUDA solution = %f, CPU copy = %f\n", nx, nh_x);
  }
  
  if (!param->preserve_dirac) {
    delete d;
    delete dSloppy;
    delete dPre;
    diracCreation = false;
    diracTune = false;
  }  

  delete h_b;
  delete h_x;
  delete b;
  delete x;
  
  return;
}
Exemplo n.º 2
0
int main(int argc,char* argv[])
{   
   qcd_int_4 i,j,k;              // various loop variables
   qcd_uint_2 mu,col;
   
   int params_len;               // needed to read inputfiles
   char *params = NULL;                 // needed to read inputfiles

   
   qcd_uint_4 Nrestart;                         // restart GCR every Nrestart iterations
   char gauge_name[qcd_MAX_STRING_LENGTH];      // name of gauge configuration
   char param_name[qcd_MAX_STRING_LENGTH];      // name of parameter file  
   char sol_name[qcd_MAX_STRING_LENGTH];        // name of solution file
   char src_name[qcd_MAX_STRING_LENGTH];        // name of source file
   char src_type[qcd_MAX_STRING_LENGTH];        // source type
   char sol_type[qcd_MAX_STRING_LENGTH];        // solution type
   qcd_real_8 kappa;                            // hopping parameter
   qcd_real_8 muTM;                             // twisted mass parameter
   qcd_real_8 normsrc,normres;                  // norm of source, norm of residue
   qcd_uint_4 maxIter = 10000;
   qcd_uint_4 iter;
   qcd_real_8 maxRes = 1e-8;
 
   qcd_geometry geo;                            // geometry structure
 
   qcd_real_8 theta[4] = {M_PI,0.0,0.0,0.0};    // antiperiodic b.c. in time
   qcd_uint_2 L[4];
   qcd_uint_2 P[4];

   qcd_vector src;
   qcd_vector sol;
   qcd_vector res;
   qcd_vector correction;
   qcd_gaugeField u;

   
   int myid,numprocs, namelen;    
   char processor_name[MPI_MAX_PROCESSOR_NAME];
   				 
				 
             
             
             
   //set up MPI
   MPI_Init(&argc, &argv);
   MPI_Comm_size(MPI_COMM_WORLD,&numprocs);         // num. of processes taking part in the calculation
   MPI_Comm_rank(MPI_COMM_WORLD,&myid);             // each process gets its ID
   MPI_Get_processor_name(processor_name,&namelen); // 
   
         
   //////////////////// READ INPUT FILE /////////////////////////////////////////////
      
   if(argc!=2)
   {
      if(myid==0) fprintf(stderr,"No input file specified\n");
      exit(EXIT_FAILURE);
   }

   strcpy(param_name,argv[1]);
   if(myid==0)
   {
      i=0;
      printf("opening input file %s\n",param_name);
      params=qcd_getParams(param_name,&params_len);
      if(params==NULL)
      {
         i=1;
      }
   }
   MPI_Bcast(&i,1,MPI_INT, 0, MPI_COMM_WORLD);
   if(i==1) exit(EXIT_FAILURE);
   MPI_Bcast(&params_len, 1, MPI_INT, 0, MPI_COMM_WORLD);
   if(myid!=0) params = (char*) malloc(params_len*sizeof(char));
   MPI_Bcast(params, params_len, MPI_CHAR, 0, MPI_COMM_WORLD);
   
   sscanf(qcd_getParam("<processors_txyz>",params,params_len),"%hd %hd %hd %hd",&P[0], &P[1], &P[2], &P[3]);
   sscanf(qcd_getParam("<lattice_txyz>",params,params_len),"%hd %hd %hd %hd",&L[0], &L[1], &L[2], &L[3]);
   if(qcd_initGeometry(&geo,L,P, theta, myid, numprocs)) exit(EXIT_FAILURE);
   
   if(myid==0) printf(" Local lattice: %i x %i x %i x %i\n",geo.lL[0],geo.lL[1],geo.lL[2],geo.lL[3]);
  
   strcpy(src_type,qcd_getParam("<source_type>",params,params_len));
   if(myid==0) printf("Got source type: %s\n",src_type);
   
   /* src_type == "HMC_PROPAGATOR", propagator with 12 vectors */
   strcpy(src_name,qcd_getParam("<source>",params,params_len));
   if(myid==0) printf("Got source file name: %s\n",src_name);

   strcpy(sol_type,qcd_getParam("<solution_type>",params,params_len));
   if(myid==0) printf("Got solution type: %s\n",sol_type);
  
   strcpy(sol_name,qcd_getParam("<solution>",params,params_len));
   if(myid==0) printf("Got solution file name: %s\n",sol_name);
   
   strcpy(gauge_name,qcd_getParam("<cfg_name>",params,params_len));
   if(myid==0) printf("Got conf name: %s\n",gauge_name);
   
   sscanf(qcd_getParam("<N_restart>",params,params_len),"%u",&Nrestart);
   if(myid==0) printf("Got N_restart: %u\n",Nrestart);

   sscanf(qcd_getParam("<kappa>",params,params_len),"%lf",&kappa);
   if(myid==0) printf("Got kappa: %e\n",kappa);
   
   sscanf(qcd_getParam("<mu>",params,params_len),"%lf",&muTM);
   if(myid==0) printf("Got mu: %e\n",muTM);
              
   free(params);



         
   //#####################################################################   
   // allocate memory
  
   /* src_type == HMC */
   j = 0;
   j += qcd_initVector(&src, &geo);
   j += qcd_initVector(&sol, &geo);
   j += qcd_initVector(&res, &geo);
   j += qcd_initVector(&correction, &geo);
   j += qcd_initGaugeField(&u, &geo);
   
   MPI_Allreduce(&j, &k, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
   if(k>0)
   {
      if(myid==0) printf("not enough memory\n");
      MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
   }
   if(myid==0) printf("memory for propagators and gauge-field allocated\n");
         
   
   //##############################################################################
   // load gauge-field
   if(qcd_getGaugeField(gauge_name,qcd_GF_LIME,&u)) exit(EXIT_FAILURE);
   if(myid==0) printf("gauge-field loaded\n");   
   
   
   
   for(mu=0; mu<4; mu++)
   for(col=0; col<3; col++)
   {
      if(myid==0) printf("------------ vector: mu = %hi,  col = %hi ------------\n",mu,col);
      
      iter = 1;
      if(qcd_getVector(src_name,qcd_PROP_HMC, mu, col, &src)) exit(EXIT_FAILURE);
      if(myid==0) printf("vector from %s loaded\n",src_name);   
      
      normsrc = qcd_normVector(&src);
      if(myid==0) printf("Norm of source: %e\n",normsrc);
      
      gcr(&sol, &src, &u, kappa, muTM, Nrestart, &geo);      
      
      /* calculate true residue */
      qcd_applyQTMOp(&res, &sol, &u, 1.0/(2.0*kappa)-4.0 ,muTM);
      
      qcd_subVector(&res, &src, &res);
      normres = qcd_normVector(&res);
      if(myid==0) printf("True norm of residue: %e\n",normres);
      normres /= normsrc;
      if(myid==0) printf("Relative residue: %e\n",normres);
      
      /* iterative improvement until precision reached */
      while(normres>maxRes && iter < maxIter)
      {
         /* solve D correction = residue */
         /* and set solution <- solution - correction */
         gcr(&correction, &res, &u, kappa, muTM, Nrestart, &geo);
         qcd_addVector(&sol,&sol,&correction);
         
         /* calculate true residue */
         qcd_applyQTMOp(&res, &sol, &u, 1.0/(2.0*kappa)-4.0 ,muTM);
         qcd_subVector(&res, &src, &res);
         normres = qcd_normVector(&res)/normsrc;
         if(myid==0) printf("True relative residue: %e\n",normres);
         iter++;
      }
      if(myid==0) printf("Converged after %i x %i iterations.\n\n",iter,Nrestart);
   }
   
   
   //#####################################################################   
   // clean up
   if(myid==0) printf("cleaning up...\n");
   qcd_destroyVector(&src);
   qcd_destroyVector(&sol);
   qcd_destroyVector(&res);
   qcd_destroyVector(&correction);
   qcd_destroyGaugeField(&u);
   qcd_destroyGeometry(&geo);
   MPI_Finalize();
}//end main