double det_acc(const int id, hamiltonian_field_t * const hf) { monomial * mnl = &monomial_list[id]; int save_iter = ITER_MAX_BCG; int save_sloppy = g_sloppy_precision_flag; g_mu = mnl->mu; boundary(mnl->kappa); if(mnl->even_odd_flag) { if(mnl->solver == CG) { ITER_MAX_BCG = 0; } chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi); g_sloppy_precision_flag = 0; mnl->iter0 = bicg(g_spinor_field[2], mnl->pf, mnl->accprec, g_relative_precision_flag); g_sloppy_precision_flag = save_sloppy; /* Compute the energy contr. from first field */ mnl->energy1 = square_norm(g_spinor_field[2], VOLUME/2, 1); } else { if(mnl->solver == CG) { chrono_guess(g_spinor_field[DUM_DERI+5], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi); mnl->iter0 = cg_her(g_spinor_field[DUM_DERI+5], mnl->pf, mnl->maxiter, mnl->accprec, g_relative_precision_flag, VOLUME, Q_pm_psi); Q_minus_psi(g_spinor_field[2], g_spinor_field[DUM_DERI+5]); /* Compute the energy contr. from first field */ mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1); } else { chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi); mnl->iter0 += bicgstab_complex(g_spinor_field[2], mnl->pf, mnl->maxiter, mnl->forceprec, g_relative_precision_flag, VOLUME, Q_plus_psi); mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1); } } g_mu = g_mu1; boundary(g_kappa); if(g_proc_id == 0 && g_debug_level > 3) { printf("called det_acc for id %d %d dH = %1.4e\n", id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0); } ITER_MAX_BCG = save_iter; return(mnl->energy1 - mnl->energy0); }
//============================================================================= int main(int argc, char** argv) { srand(time(NULL)); const int size(100); CPPL::dgematrix A(size,size); //CPPL::dgsmatrix A(size,size); for(int i=0; i<size; i++){ for(int j=0; j<size; j++){ if(rand()%2){ A(i,j) =(double(rand())/double(RAND_MAX))*2.0 -1.0; } } A(i,i)+=10.; } A.write("A.dgematrix"); CPPL::dcovector x(size); for(int i=0; i<size; i++){ x(i) =(double(rand())/double(RAND_MAX))*1. -0.5; } x.write("answer.dcovector");//solution std::cerr << "answer=\n" << t(x) << std::endl; CPPL::dcovector y(A*x); y.write("y.dcovector"); //std::cerr << "y=\n" << t(y) << std::endl; double eps(fabs(damax(y))*1e-6); //std::cerr << "eps=" << eps << std::endl; if( bicg(A, y, eps) ){ std::cerr << "failed." << std::endl; exit(1); } y.write("solution.dcovector"); std::cout << "solution=\n" << t(y) << std::endl; //std::cerr << "A*x=\n" << t(A*y) << std::endl; return 0; }
void invertQuda(void *hp_x, void *hp_b, QudaInvertParam *param) { // check the gauge fields have been created cudaGaugeField *cudaGauge = checkGauge(param); checkInvertParam(param); if (param->cuda_prec_sloppy != param->prec_precondition && param->inv_type_precondition != QUDA_INVALID_INVERTER) errorQuda("Sorry, cannot yet use different sloppy and preconditioner precisions"); verbosity = param->verbosity; bool pc_solve = (param->solve_type == QUDA_DIRECT_PC_SOLVE || param->solve_type == QUDA_NORMEQ_PC_SOLVE); bool pc_solution = (param->solution_type == QUDA_MATPC_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION); param->spinorGiB = cudaGauge->VolumeCB() * spinorSiteSize; if (!pc_solve) param->spinorGiB *= 2; param->spinorGiB *= (param->cuda_prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float)); if (param->preserve_source == QUDA_PRESERVE_SOURCE_NO) { param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 5 : 7)/(double)(1<<30); } else { param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 8 : 9)/(double)(1<<30); } param->secs = 0; param->gflops = 0; param->iter = 0; // create the dirac operator DiracParam diracParam; createDirac(diracParam, *param, pc_solve); Dirac &dirac = *d; Dirac &diracSloppy = *dSloppy; Dirac &diracPre = *dPre; cpuColorSpinorField *h_b = NULL; cpuColorSpinorField *h_x = NULL; cudaColorSpinorField *b = NULL; cudaColorSpinorField *x = NULL; cudaColorSpinorField *in = NULL; cudaColorSpinorField *out = NULL; const int *X = cudaGauge->X(); // wrap CPU host side pointers ColorSpinorParam cpuParam(hp_b, *param, X, pc_solution); h_b = new cpuColorSpinorField(cpuParam); cpuParam.v = hp_x; h_x = new cpuColorSpinorField(cpuParam); // download source ColorSpinorParam cudaParam(cpuParam, *param); cudaParam.create = QUDA_COPY_FIELD_CREATE; b = new cudaColorSpinorField(*h_b, cudaParam); if (param->use_init_guess == QUDA_USE_INIT_GUESS_YES) { // download initial guess x = new cudaColorSpinorField(*h_x, cudaParam); // solution } else { // zero initial guess cudaParam.create = QUDA_ZERO_FIELD_CREATE; x = new cudaColorSpinorField(cudaParam); // solution } if (param->verbosity >= QUDA_VERBOSE) { double nh_b = norm2(*h_b); double nb = norm2(*b); printfQuda("Source: CPU = %f, CUDA copy = %f\n", nh_b, nb); } tuneDirac(*param, pc_solution ? *x : x->Even()); dirac.prepare(in, out, *x, *b, param->solution_type); if (param->verbosity >= QUDA_VERBOSE) { double nin = norm2(*in); printfQuda("Prepared source = %f\n", nin); } massRescale(param->dslash_type, diracParam.kappa, param->solution_type, param->mass_normalization, *in); switch (param->inv_type) { case QUDA_CG_INVERTER: if (param->solution_type != QUDA_MATDAG_MAT_SOLUTION && param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) { copyCuda(*out, *in); dirac.Mdag(*in, *out); } { DiracMdagM m(dirac), mSloppy(diracSloppy); CG cg(m, mSloppy, *param); cg(*out, *in); } break; case QUDA_BICGSTAB_INVERTER: if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) { DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre); BiCGstab bicg(m, mSloppy, mPre, *param); bicg(*out, *in); copyCuda(*in, *out); } { DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre); BiCGstab bicg(m, mSloppy, mPre, *param); bicg(*out, *in); } break; case QUDA_GCR_INVERTER: if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) { DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre); GCR gcr(m, mSloppy, mPre, *param); gcr(*out, *in); copyCuda(*in, *out); } { DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre); GCR gcr(m, mSloppy, mPre, *param); gcr(*out, *in); } break; default: errorQuda("Inverter type %d not implemented", param->inv_type); } if (param->verbosity >= QUDA_VERBOSE){ double nx = norm2(*x); printfQuda("Solution = %f\n",nx); } dirac.reconstruct(*x, *b, param->solution_type); x->saveCPUSpinorField(*h_x); // since this is a reference, this won't work: h_x = x; if (param->verbosity >= QUDA_VERBOSE){ double nx = norm2(*x); double nh_x = norm2(*h_x); printfQuda("Reconstructed: CUDA solution = %f, CPU copy = %f\n", nx, nh_x); } if (!param->preserve_dirac) { delete d; delete dSloppy; delete dPre; diracCreation = false; diracTune = false; } delete h_b; delete h_x; delete b; delete x; return; }