// execute kernel double dslashCUDA() { printfQuda("Executing %d kernel loops...\n", loops); fflush(stdout); if (test_type < 2) dirac->Tune(*cudaSpinorOut, *cudaSpinor, *tmp); else dirac->Tune(cudaSpinorOut->Even(), cudaSpinor->Even(), *tmp); cudaEvent_t start, end; cudaEventCreate(&start); cudaEventRecord(start, 0); cudaEventSynchronize(start); for (int i = 0; i < loops; i++) { switch (test_type) { case 0: if (transfer) { dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity); } else { dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity); } break; case 1: case 2: if (transfer) { MatQuda(spinorOut->V(), spinor->V(), &inv_param); } else { dirac->M(*cudaSpinorOut, *cudaSpinor); } break; } } cudaEventCreate(&end); cudaEventRecord(end, 0); cudaEventSynchronize(end); float runTime; cudaEventElapsedTime(&runTime, start, end); cudaEventDestroy(start); cudaEventDestroy(end); double secs = runTime / 1000; //stopwatchReadSeconds(); // check for errors cudaError_t stat = cudaGetLastError(); if (stat != cudaSuccess) printf("with ERROR: %s\n", cudaGetErrorString(stat)); printf("done.\n\n"); return secs; }
// execute kernel double dslashCUDA(int niter) { cudaEvent_t start, end; cudaEventCreate(&start); cudaEventCreate(&end); cudaEventRecord(start, 0); for (int i = 0; i < niter; i++) { switch (test_type) { case 0: if (transfer) { dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity); } else { //inv_param.input_location = QUDA_CUDA_FIELD_LOCATION; //inv_param.output_location = QUDA_CUDA_FIELD_LOCATION; //dslashQuda(cudaSpinorOut->V(), cudaSpinor->V(), &inv_param, parity); dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity); } break; case 1: case 2: if (transfer) { MatQuda(spinorOut->V(), spinor->V(), &inv_param); } else { dirac->M(*cudaSpinorOut, *cudaSpinor); } break; case 3: case 4: if (transfer) { MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param); } else { dirac->MdagM(*cudaSpinorOut, *cudaSpinor); } break; } } cudaEventRecord(end, 0); cudaEventSynchronize(end); float runTime; cudaEventElapsedTime(&runTime, start, end); cudaEventDestroy(start); cudaEventDestroy(end); double secs = runTime / 1000; //stopwatchReadSeconds(); // check for errors cudaError_t stat = cudaGetLastError(); if (stat != cudaSuccess) printfQuda("with ERROR: %s\n", cudaGetErrorString(stat)); return secs; }