void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity) { ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), 1); ColorSpinorParam cudaParam(cpuParam, *inv_param); cpuColorSpinorField hIn(cpuParam); cudaColorSpinorField in(hIn, cudaParam); cudaParam.create = QUDA_NULL_FIELD_CREATE; cudaColorSpinorField out(in, cudaParam); if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) { if (parity == QUDA_EVEN_PARITY) { parity = QUDA_ODD_PARITY; } else { parity = QUDA_EVEN_PARITY; } axCuda(gaugePrecise->Anisotropy(), in); } bool pc = true; DiracParam diracParam; setDiracParam(diracParam, inv_param, pc); Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator dirac->Dslash(out, in, parity); // apply the operator delete dirac; // clean up cpuParam.v = h_out; cpuColorSpinorField hOut(cpuParam); out.saveCPUSpinorField(hOut); // since this is a reference, this won't work: hOut = out; }
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param) { bool pc = (inv_param->solution_type == QUDA_MATPC_SOLUTION || inv_param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION); ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), pc); ColorSpinorParam cudaParam(cpuParam, *inv_param); cpuColorSpinorField hIn(cpuParam); cudaColorSpinorField in(hIn, cudaParam); cudaParam.create = QUDA_NULL_FIELD_CREATE; cudaColorSpinorField out(in, cudaParam); // double kappa = inv_param->kappa; // if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa *= gaugePrecise->anisotropy; DiracParam diracParam; setDiracParam(diracParam, inv_param, pc); Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator dirac->MdagM(out, in); // apply the operator delete dirac; // clean up double kappa = inv_param->kappa; if (pc) { if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION) { axCuda(1.0/pow(2.0*kappa,4), out); } else if (inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(0.25/(kappa*kappa), out); } } else { if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION || inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(0.25/(kappa*kappa), out); } } cpuParam.v = h_out; cpuColorSpinorField hOut(cpuParam); out.saveCPUSpinorField(hOut); // since this is a reference, this won't work: hOut = out; }
static void massRescale(QudaDslashType dslash_type, double &kappa, QudaSolutionType solution_type, QudaMassNormalization mass_normalization, cudaColorSpinorField &b) { if (dslash_type == QUDA_ASQTAD_DSLASH) { if (mass_normalization != QUDA_MASS_NORMALIZATION) { errorQuda("Staggered code only supports QUDA_MASS_NORMALIZATION"); } return; } // multiply the source to compensate for normalization of the Dirac operator, if necessary switch (solution_type) { case QUDA_MAT_SOLUTION: if (mass_normalization == QUDA_MASS_NORMALIZATION || mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(2.0*kappa, b); } break; case QUDA_MATDAG_MAT_SOLUTION: if (mass_normalization == QUDA_MASS_NORMALIZATION || mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(4.0*kappa*kappa, b); } break; case QUDA_MATPC_SOLUTION: if (mass_normalization == QUDA_MASS_NORMALIZATION) { axCuda(4.0*kappa*kappa, b); } else if (mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(2.0*kappa, b); } break; case QUDA_MATPCDAG_MATPC_SOLUTION: if (mass_normalization == QUDA_MASS_NORMALIZATION) { axCuda(16.0*pow(kappa,4), b); } else if (mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(4.0*kappa*kappa, b); } break; default: errorQuda("Solution type %d not supported", solution_type); } if (verbosity >= QUDA_DEBUG_VERBOSE) printfQuda("Mass rescale done\n"); }
void CG3::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { // Check to see that we're not trying to invert on a zero-field source const double b2 = norm2(b); if(b2 == 0){ profile.TPSTOP(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x=b; param.true_res = 0.0; param.true_res_hq = 0.0; return; } ColorSpinorParam csParam(x); csParam.create = QUDA_ZERO_FIELD_CREATE; cudaColorSpinorField x_prev(b, csParam); cudaColorSpinorField r_prev(b, csParam); cudaColorSpinorField temp(b, csParam); cudaColorSpinorField r(b); cudaColorSpinorField w(b); mat(r, x, temp); // r = Mx double r2 = xmyNormCuda(b,r); // r = b - Mx PrintStats("CG3", 0, r2, b2, 0.0); double stop = stopping(param.tol, b2, param.residual_type); if(convergence(r2, 0.0, stop, 0.0)) return; // First iteration mat(w, r, temp); double rAr = reDotProductCuda(r,w); double rho = 1.0; double gamma_prev = 0.0; double gamma = r2/rAr; cudaColorSpinorField x_new(x); cudaColorSpinorField r_new(r); axpyCuda(gamma, r, x_new); // x_new += gamma*r axpyCuda(-gamma, w, r_new); // r_new -= gamma*w // end of first iteration // axpbyCuda(a,b,x,y) => y = a*x + b*y int k = 1; // First iteration performed above double r2_prev; while(!convergence(r2, 0.0, stop, 0.0) && k<param.maxiter){ x_prev = x; x = x_new; r_prev = r; r = r_new; mat(w, r, temp); rAr = reDotProductCuda(r,w); r2_prev = r2; r2 = norm2(r); // Need to rearrange this! PrintStats("CG3", k, r2, b2, 0.0); gamma_prev = gamma; gamma = r2/rAr; rho = 1.0/(1. - (gamma/gamma_prev)*(r2/r2_prev)*(1.0/rho)); x_new = x; axCuda(rho,x_new); axpyCuda(rho*gamma,r,x_new); axpyCuda((1. - rho),x_prev,x_new); r_new = r; axCuda(rho,r_new); axpyCuda(-rho*gamma,w,r_new); axpyCuda((1.-rho),r_prev,r_new); double rr_old = reDotProductCuda(r_new,r); printfQuda("rr_old = %1.14lf\n", rr_old); k++; } if(k == param.maxiter) warningQuda("Exceeded maximum iterations %d", param.maxiter); // compute the true residual mat(r, x, temp); param.true_res = sqrt(xmyNormCuda(b, r)/b2); PrintSummary("CG3", k, r2, b2); return; }
void MR::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { globalReduce = false; // use local reductions for DD solver if (!init) { ColorSpinorParam csParam(x); csParam.create = QUDA_ZERO_FIELD_CREATE; if (param.preserve_source == QUDA_PRESERVE_SOURCE_YES) { rp = new cudaColorSpinorField(x, csParam); allocate_r = true; } Arp = new cudaColorSpinorField(x); tmpp = new cudaColorSpinorField(x, csParam); //temporary for mat-vec init = true; } cudaColorSpinorField &r = (param.preserve_source == QUDA_PRESERVE_SOURCE_YES) ? *rp : b; cudaColorSpinorField &Ar = *Arp; cudaColorSpinorField &tmp = *tmpp; // set initial guess to zero and thus the residual is just the source zeroCuda(x); // can get rid of this for a special first update kernel double b2 = normCuda(b); if (&r != &b) copyCuda(r, b); // domain-wise normalization of the initial residual to prevent underflow double r2=0.0; // if zero source then we will exit immediately doing no work if (b2 > 0.0) { axCuda(1/sqrt(b2), r); // can merge this with the prior copy r2 = 1.0; // by definition by this is now true } if (param.inv_type_precondition != QUDA_GCR_INVERTER) { quda::blas_flops = 0; profile.TPSTART(QUDA_PROFILE_COMPUTE); } double omega = 1.0; int k = 0; if (getVerbosity() >= QUDA_DEBUG_VERBOSE) { double x2 = norm2(x); double3 Ar3 = cDotProductNormBCuda(Ar, r); printfQuda("MR: %d iterations, r2 = %e, <r|A|r> = (%e, %e), x2 = %e\n", k, Ar3.z, Ar3.x, Ar3.y, x2); } while (k < param.maxiter && r2 > 0.0) { mat(Ar, r, tmp); double3 Ar3 = cDotProductNormACuda(Ar, r); Complex alpha = Complex(Ar3.x, Ar3.y) / Ar3.z; // x += omega*alpha*r, r -= omega*alpha*Ar, r2 = norm2(r) //r2 = caxpyXmazNormXCuda(omega*alpha, r, x, Ar); caxpyXmazCuda(omega*alpha, r, x, Ar); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) { double x2 = norm2(x); double r2 = norm2(r); printfQuda("MR: %d iterations, r2 = %e, <r|A|r> = (%e,%e) x2 = %e\n", k+1, r2, Ar3.x, Ar3.y, x2); } else if (getVerbosity() >= QUDA_VERBOSE) { printfQuda("MR: %d iterations, <r|A|r> = (%e, %e)\n", k, Ar3.x, Ar3.y); } k++; } if (getVerbosity() >= QUDA_VERBOSE) { mat(Ar, r, tmp); Complex Ar2 = cDotProductCuda(Ar, r); printfQuda("MR: %d iterations, <r|A|r> = (%e, %e)\n", k, real(Ar2), imag(Ar2)); } // Obtain global solution by rescaling if (b2 > 0.0) axCuda(sqrt(b2), x); if (param.inv_type_precondition != QUDA_GCR_INVERTER) { profile.TPSTOP(QUDA_PROFILE_COMPUTE); profile.TPSTART(QUDA_PROFILE_EPILOGUE); param.secs += profile.Last(QUDA_PROFILE_COMPUTE); double gflops = (quda::blas_flops + mat.flops())*1e-9; reduceDouble(gflops); param.gflops += gflops; param.iter += k; // this is the relative residual since it has been scaled by b2 r2 = norm2(r); if (param.preserve_source == QUDA_PRESERVE_SOURCE_YES) { // Calculate the true residual mat(r, x); double true_res = xmyNormCuda(b, r); param.true_res = sqrt(true_res / b2); if (getVerbosity() >= QUDA_SUMMARIZE) { printfQuda("MR: Converged after %d iterations, relative residua: iterated = %e, true = %e\n", k, sqrt(r2), param.true_res); } } else { if (getVerbosity() >= QUDA_SUMMARIZE) { printfQuda("MR: Converged after %d iterations, relative residua: iterated = %e\n", k, sqrt(r2)); } } // reset the flops counters quda::blas_flops = 0; mat.flops(); profile.TPSTOP(QUDA_PROFILE_EPILOGUE); } globalReduce = true; // renable global reductions for outer solver return; }