void invertQuda(void *hp_x, void *hp_b, QudaInvertParam *param) { // check the gauge fields have been created cudaGaugeField *cudaGauge = checkGauge(param); checkInvertParam(param); if (param->cuda_prec_sloppy != param->prec_precondition && param->inv_type_precondition != QUDA_INVALID_INVERTER) errorQuda("Sorry, cannot yet use different sloppy and preconditioner precisions"); verbosity = param->verbosity; bool pc_solve = (param->solve_type == QUDA_DIRECT_PC_SOLVE || param->solve_type == QUDA_NORMEQ_PC_SOLVE); bool pc_solution = (param->solution_type == QUDA_MATPC_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION); param->spinorGiB = cudaGauge->VolumeCB() * spinorSiteSize; if (!pc_solve) param->spinorGiB *= 2; param->spinorGiB *= (param->cuda_prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float)); if (param->preserve_source == QUDA_PRESERVE_SOURCE_NO) { param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 5 : 7)/(double)(1<<30); } else { param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 8 : 9)/(double)(1<<30); } param->secs = 0; param->gflops = 0; param->iter = 0; // create the dirac operator DiracParam diracParam; createDirac(diracParam, *param, pc_solve); Dirac &dirac = *d; Dirac &diracSloppy = *dSloppy; Dirac &diracPre = *dPre; cpuColorSpinorField *h_b = NULL; cpuColorSpinorField *h_x = NULL; cudaColorSpinorField *b = NULL; cudaColorSpinorField *x = NULL; cudaColorSpinorField *in = NULL; cudaColorSpinorField *out = NULL; const int *X = cudaGauge->X(); // wrap CPU host side pointers ColorSpinorParam cpuParam(hp_b, *param, X, pc_solution); h_b = new cpuColorSpinorField(cpuParam); cpuParam.v = hp_x; h_x = new cpuColorSpinorField(cpuParam); // download source ColorSpinorParam cudaParam(cpuParam, *param); cudaParam.create = QUDA_COPY_FIELD_CREATE; b = new cudaColorSpinorField(*h_b, cudaParam); if (param->use_init_guess == QUDA_USE_INIT_GUESS_YES) { // download initial guess x = new cudaColorSpinorField(*h_x, cudaParam); // solution } else { // zero initial guess cudaParam.create = QUDA_ZERO_FIELD_CREATE; x = new cudaColorSpinorField(cudaParam); // solution } if (param->verbosity >= QUDA_VERBOSE) { double nh_b = norm2(*h_b); double nb = norm2(*b); printfQuda("Source: CPU = %f, CUDA copy = %f\n", nh_b, nb); } tuneDirac(*param, pc_solution ? *x : x->Even()); dirac.prepare(in, out, *x, *b, param->solution_type); if (param->verbosity >= QUDA_VERBOSE) { double nin = norm2(*in); printfQuda("Prepared source = %f\n", nin); } massRescale(param->dslash_type, diracParam.kappa, param->solution_type, param->mass_normalization, *in); switch (param->inv_type) { case QUDA_CG_INVERTER: if (param->solution_type != QUDA_MATDAG_MAT_SOLUTION && param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) { copyCuda(*out, *in); dirac.Mdag(*in, *out); } { DiracMdagM m(dirac), mSloppy(diracSloppy); CG cg(m, mSloppy, *param); cg(*out, *in); } break; case QUDA_BICGSTAB_INVERTER: if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) { DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre); BiCGstab bicg(m, mSloppy, mPre, *param); bicg(*out, *in); copyCuda(*in, *out); } { DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre); BiCGstab bicg(m, mSloppy, mPre, *param); bicg(*out, *in); } break; case QUDA_GCR_INVERTER: if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) { DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre); GCR gcr(m, mSloppy, mPre, *param); gcr(*out, *in); copyCuda(*in, *out); } { DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre); GCR gcr(m, mSloppy, mPre, *param); gcr(*out, *in); } break; default: errorQuda("Inverter type %d not implemented", param->inv_type); } if (param->verbosity >= QUDA_VERBOSE){ double nx = norm2(*x); printfQuda("Solution = %f\n",nx); } dirac.reconstruct(*x, *b, param->solution_type); x->saveCPUSpinorField(*h_x); // since this is a reference, this won't work: h_x = x; if (param->verbosity >= QUDA_VERBOSE){ double nx = norm2(*x); double nh_x = norm2(*h_x); printfQuda("Reconstructed: CUDA solution = %f, CPU copy = %f\n", nx, nh_x); } if (!param->preserve_dirac) { delete d; delete dSloppy; delete dPre; diracCreation = false; diracTune = false; } delete h_b; delete h_x; delete b; delete x; return; }
int main(int argc,char* argv[]) { qcd_int_4 i,j,k; // various loop variables qcd_uint_2 mu,col; int params_len; // needed to read inputfiles char *params = NULL; // needed to read inputfiles qcd_uint_4 Nrestart; // restart GCR every Nrestart iterations char gauge_name[qcd_MAX_STRING_LENGTH]; // name of gauge configuration char param_name[qcd_MAX_STRING_LENGTH]; // name of parameter file char sol_name[qcd_MAX_STRING_LENGTH]; // name of solution file char src_name[qcd_MAX_STRING_LENGTH]; // name of source file char src_type[qcd_MAX_STRING_LENGTH]; // source type char sol_type[qcd_MAX_STRING_LENGTH]; // solution type qcd_real_8 kappa; // hopping parameter qcd_real_8 muTM; // twisted mass parameter qcd_real_8 normsrc,normres; // norm of source, norm of residue qcd_uint_4 maxIter = 10000; qcd_uint_4 iter; qcd_real_8 maxRes = 1e-8; qcd_geometry geo; // geometry structure qcd_real_8 theta[4] = {M_PI,0.0,0.0,0.0}; // antiperiodic b.c. in time qcd_uint_2 L[4]; qcd_uint_2 P[4]; qcd_vector src; qcd_vector sol; qcd_vector res; qcd_vector correction; qcd_gaugeField u; int myid,numprocs, namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; //set up MPI MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); // num. of processes taking part in the calculation MPI_Comm_rank(MPI_COMM_WORLD,&myid); // each process gets its ID MPI_Get_processor_name(processor_name,&namelen); // //////////////////// READ INPUT FILE ///////////////////////////////////////////// if(argc!=2) { if(myid==0) fprintf(stderr,"No input file specified\n"); exit(EXIT_FAILURE); } strcpy(param_name,argv[1]); if(myid==0) { i=0; printf("opening input file %s\n",param_name); params=qcd_getParams(param_name,¶ms_len); if(params==NULL) { i=1; } } MPI_Bcast(&i,1,MPI_INT, 0, MPI_COMM_WORLD); if(i==1) exit(EXIT_FAILURE); MPI_Bcast(¶ms_len, 1, MPI_INT, 0, MPI_COMM_WORLD); if(myid!=0) params = (char*) malloc(params_len*sizeof(char)); MPI_Bcast(params, params_len, MPI_CHAR, 0, MPI_COMM_WORLD); sscanf(qcd_getParam("<processors_txyz>",params,params_len),"%hd %hd %hd %hd",&P[0], &P[1], &P[2], &P[3]); sscanf(qcd_getParam("<lattice_txyz>",params,params_len),"%hd %hd %hd %hd",&L[0], &L[1], &L[2], &L[3]); if(qcd_initGeometry(&geo,L,P, theta, myid, numprocs)) exit(EXIT_FAILURE); if(myid==0) printf(" Local lattice: %i x %i x %i x %i\n",geo.lL[0],geo.lL[1],geo.lL[2],geo.lL[3]); strcpy(src_type,qcd_getParam("<source_type>",params,params_len)); if(myid==0) printf("Got source type: %s\n",src_type); /* src_type == "HMC_PROPAGATOR", propagator with 12 vectors */ strcpy(src_name,qcd_getParam("<source>",params,params_len)); if(myid==0) printf("Got source file name: %s\n",src_name); strcpy(sol_type,qcd_getParam("<solution_type>",params,params_len)); if(myid==0) printf("Got solution type: %s\n",sol_type); strcpy(sol_name,qcd_getParam("<solution>",params,params_len)); if(myid==0) printf("Got solution file name: %s\n",sol_name); strcpy(gauge_name,qcd_getParam("<cfg_name>",params,params_len)); if(myid==0) printf("Got conf name: %s\n",gauge_name); sscanf(qcd_getParam("<N_restart>",params,params_len),"%u",&Nrestart); if(myid==0) printf("Got N_restart: %u\n",Nrestart); sscanf(qcd_getParam("<kappa>",params,params_len),"%lf",&kappa); if(myid==0) printf("Got kappa: %e\n",kappa); sscanf(qcd_getParam("<mu>",params,params_len),"%lf",&muTM); if(myid==0) printf("Got mu: %e\n",muTM); free(params); //##################################################################### // allocate memory /* src_type == HMC */ j = 0; j += qcd_initVector(&src, &geo); j += qcd_initVector(&sol, &geo); j += qcd_initVector(&res, &geo); j += qcd_initVector(&correction, &geo); j += qcd_initGaugeField(&u, &geo); MPI_Allreduce(&j, &k, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if(k>0) { if(myid==0) printf("not enough memory\n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } if(myid==0) printf("memory for propagators and gauge-field allocated\n"); //############################################################################## // load gauge-field if(qcd_getGaugeField(gauge_name,qcd_GF_LIME,&u)) exit(EXIT_FAILURE); if(myid==0) printf("gauge-field loaded\n"); for(mu=0; mu<4; mu++) for(col=0; col<3; col++) { if(myid==0) printf("------------ vector: mu = %hi, col = %hi ------------\n",mu,col); iter = 1; if(qcd_getVector(src_name,qcd_PROP_HMC, mu, col, &src)) exit(EXIT_FAILURE); if(myid==0) printf("vector from %s loaded\n",src_name); normsrc = qcd_normVector(&src); if(myid==0) printf("Norm of source: %e\n",normsrc); gcr(&sol, &src, &u, kappa, muTM, Nrestart, &geo); /* calculate true residue */ qcd_applyQTMOp(&res, &sol, &u, 1.0/(2.0*kappa)-4.0 ,muTM); qcd_subVector(&res, &src, &res); normres = qcd_normVector(&res); if(myid==0) printf("True norm of residue: %e\n",normres); normres /= normsrc; if(myid==0) printf("Relative residue: %e\n",normres); /* iterative improvement until precision reached */ while(normres>maxRes && iter < maxIter) { /* solve D correction = residue */ /* and set solution <- solution - correction */ gcr(&correction, &res, &u, kappa, muTM, Nrestart, &geo); qcd_addVector(&sol,&sol,&correction); /* calculate true residue */ qcd_applyQTMOp(&res, &sol, &u, 1.0/(2.0*kappa)-4.0 ,muTM); qcd_subVector(&res, &src, &res); normres = qcd_normVector(&res)/normsrc; if(myid==0) printf("True relative residue: %e\n",normres); iter++; } if(myid==0) printf("Converged after %i x %i iterations.\n\n",iter,Nrestart); } //##################################################################### // clean up if(myid==0) printf("cleaning up...\n"); qcd_destroyVector(&src); qcd_destroyVector(&sol); qcd_destroyVector(&res); qcd_destroyVector(&correction); qcd_destroyGaugeField(&u); qcd_destroyGeometry(&geo); MPI_Finalize(); }//end main