void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity) { ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), 1); ColorSpinorParam cudaParam(cpuParam, *inv_param); cpuColorSpinorField hIn(cpuParam); cudaColorSpinorField in(hIn, cudaParam); cudaParam.create = QUDA_NULL_FIELD_CREATE; cudaColorSpinorField out(in, cudaParam); if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) { if (parity == QUDA_EVEN_PARITY) { parity = QUDA_ODD_PARITY; } else { parity = QUDA_EVEN_PARITY; } axCuda(gaugePrecise->Anisotropy(), in); } bool pc = true; DiracParam diracParam; setDiracParam(diracParam, inv_param, pc); Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator dirac->Dslash(out, in, parity); // apply the operator delete dirac; // clean up cpuParam.v = h_out; cpuColorSpinorField hOut(cpuParam); out.saveCPUSpinorField(hOut); // since this is a reference, this won't work: hOut = out; }
void createDirac(DiracParam &diracParam, QudaInvertParam ¶m, bool pc_solve) { if (!diracCreation) { setDiracParam(diracParam, ¶m, pc_solve); d = Dirac::create(diracParam); // create the Dirac operator setDiracSloppyParam(diracParam, ¶m, pc_solve); dSloppy = Dirac::create(diracParam); setDiracPreParam(diracParam, ¶m, pc_solve); dPre = Dirac::create(diracParam); diracCreation = true; } }
// The preconditioner currently mimicks the sloppy operator with no comms void setDiracPreParam(DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc) { setDiracParam(diracParam, inv_param, pc); diracParam.gauge = gaugeSloppy; diracParam.fatGauge = gaugeFatSloppy; diracParam.longGauge = gaugeLongSloppy; diracParam.clover = cloverSloppy; for (int i=0; i<4; i++) { diracParam.commDim[i] = 0; // comms are always off } }
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param) { bool pc = (inv_param->solution_type == QUDA_MATPC_SOLUTION || inv_param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION); ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), pc); ColorSpinorParam cudaParam(cpuParam, *inv_param); cpuColorSpinorField hIn(cpuParam); cudaColorSpinorField in(hIn, cudaParam); cudaParam.create = QUDA_NULL_FIELD_CREATE; cudaColorSpinorField out(in, cudaParam); // double kappa = inv_param->kappa; // if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa *= gaugePrecise->anisotropy; DiracParam diracParam; setDiracParam(diracParam, inv_param, pc); Dirac *dirac = Dirac::create(diracParam); // create the Dirac operator dirac->MdagM(out, in); // apply the operator delete dirac; // clean up double kappa = inv_param->kappa; if (pc) { if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION) { axCuda(1.0/pow(2.0*kappa,4), out); } else if (inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(0.25/(kappa*kappa), out); } } else { if (inv_param->mass_normalization == QUDA_MASS_NORMALIZATION || inv_param->mass_normalization == QUDA_ASYMMETRIC_MASS_NORMALIZATION) { axCuda(0.25/(kappa*kappa), out); } } cpuParam.v = h_out; cpuColorSpinorField hOut(cpuParam); out.saveCPUSpinorField(hOut); // since this is a reference, this won't work: hOut = out; }
void init() { gauge_param = newQudaGaugeParam(); inv_param = newQudaInvertParam(); gauge_param.X[0] = 12; gauge_param.X[1] = 12; gauge_param.X[2] = 12; gauge_param.X[3] = 12; setDims(gauge_param.X, Ls); gauge_param.anisotropy = 2.3; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = QUDA_RECONSTRUCT_12; gauge_param.reconstruct_sloppy = gauge_param.reconstruct; gauge_param.cuda_prec_sloppy = gauge_param.cuda_prec; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; gauge_param.type = QUDA_WILSON_LINKS; inv_param.inv_type = QUDA_CG_INVERTER; inv_param.mass = 0.01; inv_param.m5 = -1.5; kappa5 = 0.5/(5 + inv_param.m5); inv_param.Ls = Ls; inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.dagger = dagger; inv_param.cpu_prec = cpu_prec; inv_param.cuda_prec = cuda_prec; gauge_param.ga_pad = 0; inv_param.sp_pad = 0; inv_param.cl_pad = 0; inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; inv_param.dirac_order = QUDA_DIRAC_ORDER; if (test_type == 2) { inv_param.solution_type = QUDA_MAT_SOLUTION; } else { inv_param.solution_type = QUDA_MATPC_SOLUTION; } inv_param.dslash_type = QUDA_DOMAIN_WALL_DSLASH; inv_param.verbosity = QUDA_VERBOSE; // construct input fields for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gauge_param.cpu_prec); ColorSpinorParam csParam; csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION; csParam.nColor = 3; csParam.nSpin = 4; csParam.nDim = 5; for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d]; csParam.x[4] = Ls; csParam.precision = inv_param.cpu_prec; csParam.pad = 0; if (test_type < 2) { csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; csParam.x[0] /= 2; } else { csParam.siteSubset = QUDA_FULL_SITE_SUBSET; } csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; csParam.gammaBasis = inv_param.gamma_basis; csParam.create = QUDA_ZERO_FIELD_CREATE; spinor = new cpuColorSpinorField(csParam); spinorOut = new cpuColorSpinorField(csParam); spinorRef = new cpuColorSpinorField(csParam); csParam.siteSubset = QUDA_FULL_SITE_SUBSET; csParam.x[0] = gauge_param.X[0]; printfQuda("Randomizing fields... "); construct_gauge_field(hostGauge, 1, gauge_param.cpu_prec, &gauge_param); spinor->Source(QUDA_RANDOM_SOURCE); printfQuda("done.\n"); fflush(stdout); int dev = 0; initQuda(dev); printfQuda("Sending gauge field to GPU\n"); loadGaugeQuda(hostGauge, &gauge_param); if (!transfer) { csParam.fieldLocation = QUDA_CUDA_FIELD_LOCATION; csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS; csParam.pad = inv_param.sp_pad; csParam.precision = inv_param.cuda_prec; if (csParam.precision == QUDA_DOUBLE_PRECISION ) { csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER; } else { /* Single and half */ csParam.fieldOrder = QUDA_FLOAT4_FIELD_ORDER; } if (test_type < 2) { csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; csParam.x[0] /= 2; } printfQuda("Creating cudaSpinor\n"); cudaSpinor = new cudaColorSpinorField(csParam); printfQuda("Creating cudaSpinorOut\n"); cudaSpinorOut = new cudaColorSpinorField(csParam); if (test_type == 2) csParam.x[0] /= 2; csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; tmp = new cudaColorSpinorField(csParam); printfQuda("Sending spinor field to GPU\n"); *cudaSpinor = *spinor; std::cout << "Source: CPU = " << norm2(*spinor) << ", CUDA = " << norm2(*cudaSpinor) << std::endl; bool pc = (test_type != 2); DiracParam diracParam; setDiracParam(diracParam, &inv_param, pc); diracParam.verbose = QUDA_DEBUG_VERBOSE; diracParam.tmp1 = tmp; diracParam.tmp2 = tmp2; dirac = Dirac::create(diracParam); } else { std::cout << "Source: CPU = " << norm2(*spinor) << std::endl; } }
void init(int argc, char **argv) { kernelPackT = false; // Set true for kernel T face packing cuda_prec= prec; gauge_param = newQudaGaugeParam(); inv_param = newQudaInvertParam(); gauge_param.X[0] = xdim; gauge_param.X[1] = ydim; gauge_param.X[2] = zdim; gauge_param.X[3] = tdim; setDims(gauge_param.X); gauge_param.anisotropy = 1.0; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = link_recon; gauge_param.reconstruct_sloppy = link_recon; gauge_param.cuda_prec_sloppy = cuda_prec; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; inv_param.kappa = 0.1; if (dslash_type == QUDA_TWISTED_MASS_DSLASH) { inv_param.mu = 0.01; inv_param.twist_flavor = QUDA_TWIST_MINUS; } inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.dagger = dagger; inv_param.cpu_prec = cpu_prec; if (inv_param.cpu_prec != gauge_param.cpu_prec) errorQuda("Gauge and spinor cpu precisions must match"); inv_param.cuda_prec = cuda_prec; #ifndef MULTI_GPU // free parameter for single GPU gauge_param.ga_pad = 0; #else // must be this one c/b face for multi gpu int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2; int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2; int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2; int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2; int pad_size =MAX(x_face_size, y_face_size); pad_size = MAX(pad_size, z_face_size); pad_size = MAX(pad_size, t_face_size); gauge_param.ga_pad = pad_size; #endif inv_param.sp_pad = 0; inv_param.cl_pad = 0; //inv_param.sp_pad = 24*24*24; //inv_param.cl_pad = 24*24*24; inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // test code only supports DeGrand-Rossi Basis inv_param.dirac_order = QUDA_DIRAC_ORDER; if (test_type == 2) { inv_param.solution_type = QUDA_MAT_SOLUTION; } else { inv_param.solution_type = QUDA_MATPC_SOLUTION; } inv_param.dslash_type = dslash_type; if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) { inv_param.clover_cpu_prec = cpu_prec; inv_param.clover_cuda_prec = cuda_prec; inv_param.clover_cuda_prec_sloppy = inv_param.clover_cuda_prec; inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER; //if (test_type > 0) { hostClover = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec); hostCloverInv = hostClover; // fake it /*} else { hostClover = NULL; hostCloverInv = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec); }*/ } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH) { } //inv_param.verbosity = QUDA_VERBOSE; // construct input fields for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gauge_param.cpu_prec); ColorSpinorParam csParam; csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION; csParam.nColor = 3; csParam.nSpin = 4; if (dslash_type == QUDA_TWISTED_MASS_DSLASH) { csParam.twistFlavor = inv_param.twist_flavor; } csParam.nDim = 4; for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d]; csParam.precision = inv_param.cpu_prec; csParam.pad = 0; if (test_type < 2) { csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; csParam.x[0] /= 2; } else { csParam.siteSubset = QUDA_FULL_SITE_SUBSET; } csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; csParam.gammaBasis = inv_param.gamma_basis; csParam.create = QUDA_ZERO_FIELD_CREATE; //csParam.verbose = QUDA_DEBUG_VERBOSE; spinor = new cpuColorSpinorField(csParam); spinorOut = new cpuColorSpinorField(csParam); spinorRef = new cpuColorSpinorField(csParam); csParam.siteSubset = QUDA_FULL_SITE_SUBSET; csParam.x[0] = gauge_param.X[0]; printfQuda("Randomizing fields... "); if (strcmp(latfile,"")) { // load in the command line supplied gauge field read_gauge_field(latfile, hostGauge, gauge_param.cpu_prec, gauge_param.X, argc, argv); construct_gauge_field(hostGauge, 2, gauge_param.cpu_prec, &gauge_param); } else { // else generate a random SU(3) field construct_gauge_field(hostGauge, 1, gauge_param.cpu_prec, &gauge_param); } spinor->Source(QUDA_RANDOM_SOURCE); if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) { double norm = 0.0; // clover components are random numbers in the range (-norm, norm) double diag = 1.0; // constant added to the diagonal if (test_type == 2) { construct_clover_field(hostClover, norm, diag, inv_param.clover_cpu_prec); } else { construct_clover_field(hostCloverInv, norm, diag, inv_param.clover_cpu_prec); } } printfQuda("done.\n"); fflush(stdout); initQuda(device); printfQuda("Sending gauge field to GPU\n"); loadGaugeQuda(hostGauge, &gauge_param); if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) { printfQuda("Sending clover field to GPU\n"); loadCloverQuda(hostClover, hostCloverInv, &inv_param); //clover = cudaCloverPrecise; } if (!transfer) { csParam.fieldLocation = QUDA_CUDA_FIELD_LOCATION; csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS; csParam.pad = inv_param.sp_pad; csParam.precision = inv_param.cuda_prec; if (csParam.precision == QUDA_DOUBLE_PRECISION ) { csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER; } else { /* Single and half */ csParam.fieldOrder = QUDA_FLOAT4_FIELD_ORDER; } if (test_type < 2) { csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; csParam.x[0] /= 2; } printfQuda("Creating cudaSpinor\n"); cudaSpinor = new cudaColorSpinorField(csParam); printfQuda("Creating cudaSpinorOut\n"); cudaSpinorOut = new cudaColorSpinorField(csParam); if (test_type == 2) csParam.x[0] /= 2; csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; tmp1 = new cudaColorSpinorField(csParam); if (dslash_type == QUDA_CLOVER_WILSON_DSLASH || dslash_type == QUDA_TWISTED_MASS_DSLASH) { tmp2 = new cudaColorSpinorField(csParam); } printfQuda("Sending spinor field to GPU\n"); *cudaSpinor = *spinor; std::cout << "Source: CPU = " << norm2(*spinor) << ", CUDA = " << norm2(*cudaSpinor) << std::endl; bool pc = (test_type != 2); DiracParam diracParam; setDiracParam(diracParam, &inv_param, pc); diracParam.verbose = QUDA_VERBOSE; diracParam.tmp1 = tmp1; diracParam.tmp2 = tmp2; dirac = Dirac::create(diracParam); } else { std::cout << "Source: CPU = " << norm2(*spinor) << std::endl; } }
void init() { initQuda(device); gaugeParam = newQudaGaugeParam(); inv_param = newQudaInvertParam(); gaugeParam.X[0] = X[0] = xdim; gaugeParam.X[1] = X[1] = ydim; gaugeParam.X[2] = X[2] = zdim; gaugeParam.X[3] = X[3] = tdim; setDims(gaugeParam.X); setSpinorSiteSize(6); gaugeParam.cpu_prec = QUDA_DOUBLE_PRECISION; gaugeParam.cuda_prec = prec; gaugeParam.reconstruct = link_recon; gaugeParam.reconstruct_sloppy = gaugeParam.reconstruct; gaugeParam.cuda_prec_sloppy = gaugeParam.cuda_prec; gaugeParam.anisotropy = 1.0; gaugeParam.tadpole_coeff = 0.8; gaugeParam.gauge_order = QUDA_QDP_GAUGE_ORDER; gaugeParam.t_boundary = QUDA_ANTI_PERIODIC_T; gaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO; gaugeParam.gaugeGiB = 0; inv_param.cpu_prec = QUDA_DOUBLE_PRECISION; inv_param.cuda_prec = prec; inv_param.dirac_order = QUDA_DIRAC_ORDER; inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; inv_param.dagger = dagger; inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.dslash_type = QUDA_ASQTAD_DSLASH; inv_param.input_location = QUDA_CPU_FIELD_LOCATION; inv_param.output_location = QUDA_CPU_FIELD_LOCATION; int tmpint = MAX(X[1]*X[2]*X[3], X[0]*X[2]*X[3]); tmpint = MAX(tmpint, X[0]*X[1]*X[3]); tmpint = MAX(tmpint, X[0]*X[1]*X[2]); gaugeParam.ga_pad = tmpint; inv_param.sp_pad = tmpint; ColorSpinorParam csParam; csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION; csParam.nColor=3; csParam.nSpin=1; csParam.nDim=4; for(int d = 0; d < 4; d++) { csParam.x[d] = gaugeParam.X[d]; } csParam.precision = inv_param.cpu_prec; csParam.pad = 0; if (test_type < 2) { inv_param.solution_type = QUDA_MATPC_SOLUTION; csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; csParam.x[0] /= 2; } else { inv_param.solution_type = QUDA_MAT_SOLUTION; csParam.siteSubset = QUDA_FULL_SITE_SUBSET; } csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; csParam.gammaBasis = inv_param.gamma_basis; // this parameter is meaningless for staggered csParam.create = QUDA_ZERO_FIELD_CREATE; spinor = new cpuColorSpinorField(csParam); spinorOut = new cpuColorSpinorField(csParam); spinorRef = new cpuColorSpinorField(csParam); csParam.siteSubset = QUDA_FULL_SITE_SUBSET; csParam.x[0] = gaugeParam.X[0]; printfQuda("Randomizing fields ...\n"); spinor->Source(QUDA_RANDOM_SOURCE); size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float); for (int dir = 0; dir < 4; dir++) { fatlink[dir] = malloc(V*gaugeSiteSize*gSize); longlink[dir] = malloc(V*gaugeSiteSize*gSize); } if (fatlink == NULL || longlink == NULL){ errorQuda("ERROR: malloc failed for fatlink/longlink"); } construct_fat_long_gauge_field(fatlink, longlink, 1, gaugeParam.cpu_prec, &gaugeParam); #ifdef MULTI_GPU gaugeParam.type = QUDA_ASQTAD_FAT_LINKS; gaugeParam.reconstruct = QUDA_RECONSTRUCT_NO; GaugeFieldParam cpuFatParam(fatlink, gaugeParam); cpuFat = new cpuGaugeField(cpuFatParam); cpuFat->exchangeGhost(); ghost_fatlink = cpuFat->Ghost(); gaugeParam.type = QUDA_ASQTAD_LONG_LINKS; GaugeFieldParam cpuLongParam(longlink, gaugeParam); cpuLong = new cpuGaugeField(cpuLongParam); cpuLong->exchangeGhost(); ghost_longlink = cpuLong->Ghost(); int x_face_size = X[1]*X[2]*X[3]/2; int y_face_size = X[0]*X[2]*X[3]/2; int z_face_size = X[0]*X[1]*X[3]/2; int t_face_size = X[0]*X[1]*X[2]/2; int pad_size =MAX(x_face_size, y_face_size); pad_size = MAX(pad_size, z_face_size); pad_size = MAX(pad_size, t_face_size); gaugeParam.ga_pad = pad_size; #endif gaugeParam.type = QUDA_ASQTAD_FAT_LINKS; gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO; printfQuda("Fat links sending..."); loadGaugeQuda(fatlink, &gaugeParam); printfQuda("Fat links sent\n"); gaugeParam.type = QUDA_ASQTAD_LONG_LINKS; #ifdef MULTI_GPU gaugeParam.ga_pad = 3*pad_size; #endif gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = link_recon; printfQuda("Long links sending..."); loadGaugeQuda(longlink, &gaugeParam); printfQuda("Long links sent...\n"); printfQuda("Sending fields to GPU..."); if (!transfer) { //csParam.verbose = QUDA_DEBUG_VERBOSE; csParam.fieldLocation = QUDA_CUDA_FIELD_LOCATION; csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER; csParam.pad = inv_param.sp_pad; csParam.precision = inv_param.cuda_prec; if (test_type < 2){ csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; csParam.x[0] /=2; } printfQuda("Creating cudaSpinor\n"); cudaSpinor = new cudaColorSpinorField(csParam); printfQuda("Creating cudaSpinorOut\n"); cudaSpinorOut = new cudaColorSpinorField(csParam); printfQuda("Sending spinor field to GPU\n"); *cudaSpinor = *spinor; cudaDeviceSynchronize(); checkCudaError(); double spinor_norm2 = norm2(*spinor); double cuda_spinor_norm2= norm2(*cudaSpinor); printfQuda("Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2); if(test_type == 2){ csParam.x[0] /=2; } csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; tmp = new cudaColorSpinorField(csParam); bool pc = (test_type != 2); DiracParam diracParam; setDiracParam(diracParam, &inv_param, pc); diracParam.verbose = QUDA_VERBOSE; diracParam.tmp1=tmp; dirac = Dirac::create(diracParam); } else { errorQuda("Error not suppported"); } return; }