// if even_odd_flag set void M_full_quda(spinor * const Even_new, spinor * const Odd_new, spinor * const Even, spinor * const Odd) { inv_param.kappa = g_kappa; inv_param.mu = fabs(g_mu); inv_param.epsilon = 0.0; // IMPORTANT: use opposite TM flavor since gamma5 -> -gamma5 (until LXLYLZT prob. resolved) inv_param.twist_flavor = (g_mu < 0.0 ? QUDA_TWIST_PLUS : QUDA_TWIST_MINUS); inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET || inv_param.twist_flavor == QUDA_TWIST_DEG_DOUBLET ) ? 2 : 1; void *spinorIn = (void*)g_spinor_field[DUM_DERI]; // source void *spinorOut = (void*)g_spinor_field[DUM_DERI+1]; // solution // reorder spinor convert_eo_to_lexic( spinorIn, Even, Odd ); reorder_spinor_toQuda( (double*)spinorIn, inv_param.cpu_prec, 0, NULL ); // multiply inv_param.solution_type = QUDA_MAT_SOLUTION; MatQuda( spinorOut, spinorIn, &inv_param); // reorder spinor reorder_spinor_fromQuda( (double*)spinorOut, inv_param.cpu_prec, 0, NULL ); convert_lexic_to_eo( Even_new, Odd_new, spinorOut ); }
// execute kernel double dslashCUDA() { printfQuda("Executing %d kernel loops...\n", loops); fflush(stdout); if (test_type < 2) dirac->Tune(*cudaSpinorOut, *cudaSpinor, *tmp); else dirac->Tune(cudaSpinorOut->Even(), cudaSpinor->Even(), *tmp); cudaEvent_t start, end; cudaEventCreate(&start); cudaEventRecord(start, 0); cudaEventSynchronize(start); for (int i = 0; i < loops; i++) { switch (test_type) { case 0: if (transfer) { dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity); } else { dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity); } break; case 1: case 2: if (transfer) { MatQuda(spinorOut->V(), spinor->V(), &inv_param); } else { dirac->M(*cudaSpinorOut, *cudaSpinor); } break; } } cudaEventCreate(&end); cudaEventRecord(end, 0); cudaEventSynchronize(end); float runTime; cudaEventElapsedTime(&runTime, start, end); cudaEventDestroy(start); cudaEventDestroy(end); double secs = runTime / 1000; //stopwatchReadSeconds(); // check for errors cudaError_t stat = cudaGetLastError(); if (stat != cudaSuccess) printf("with ERROR: %s\n", cudaGetErrorString(stat)); printf("done.\n\n"); return secs; }
// execute kernel double dslashCUDA(int niter) { cudaEvent_t start, end; cudaEventCreate(&start); cudaEventCreate(&end); cudaEventRecord(start, 0); for (int i = 0; i < niter; i++) { switch (test_type) { case 0: if (transfer) { dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity); } else { //inv_param.input_location = QUDA_CUDA_FIELD_LOCATION; //inv_param.output_location = QUDA_CUDA_FIELD_LOCATION; //dslashQuda(cudaSpinorOut->V(), cudaSpinor->V(), &inv_param, parity); dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity); } break; case 1: case 2: if (transfer) { MatQuda(spinorOut->V(), spinor->V(), &inv_param); } else { dirac->M(*cudaSpinorOut, *cudaSpinor); } break; case 3: case 4: if (transfer) { MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param); } else { dirac->MdagM(*cudaSpinorOut, *cudaSpinor); } break; } } cudaEventRecord(end, 0); cudaEventSynchronize(end); float runTime; cudaEventElapsedTime(&runTime, start, end); cudaEventDestroy(start); cudaEventDestroy(end); double secs = runTime / 1000; //stopwatchReadSeconds(); // check for errors cudaError_t stat = cudaGetLastError(); if (stat != cudaSuccess) printfQuda("with ERROR: %s\n", cudaGetErrorString(stat)); return secs; }
// no even-odd void D_psi_quda(spinor * const P, spinor * const Q) { inv_param.kappa = g_kappa; inv_param.mu = fabs(g_mu); inv_param.epsilon = 0.0; // IMPORTANT: use opposite TM flavor since gamma5 -> -gamma5 (until LXLYLZT prob. resolved) inv_param.twist_flavor = (g_mu < 0.0 ? QUDA_TWIST_PLUS : QUDA_TWIST_MINUS); inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET || inv_param.twist_flavor == QUDA_TWIST_DEG_DOUBLET ) ? 2 : 1; void *spinorIn = (void*)Q; void *spinorOut = (void*)P; // reorder spinor reorder_spinor_toQuda( (double*)spinorIn, inv_param.cpu_prec, 0, NULL ); // multiply inv_param.solution_type = QUDA_MAT_SOLUTION; MatQuda( spinorOut, spinorIn, &inv_param); // reorder spinor reorder_spinor_fromQuda( (double*)spinorIn, inv_param.cpu_prec, 0, NULL ); reorder_spinor_fromQuda( (double*)spinorOut, inv_param.cpu_prec, 0, NULL ); }