void Benchmarker::run(int runTimes) { OCLContext * ocl = OCLContext::getInstance(); int totalSize = inputImage.getWidth() * inputImage.getHeight() * 4; // error code cl_int ciErrNum; // events and profiling vars cl_event eventGlobal; cl_int errcode_ret; cl_ulong end, start; size_t globalWorkSize[] = { inputImage.getWidth(), inputImage.getHeight() }; for (int i = 0; i < kernels.size(); i++) { Kernel * kernel = kernels[i]; kernel->clear(); // get local work size size_t localWorkSize[] = { kernel->getLocalWorkSizeX(), kernel->getLocalWorkSizeY() }; cout << kernel->filename << "\t\t\t" << flush; bool failure = false; bool skipped = SKIP_EXCESS_LOCAL_MEM && kernel->usesLocalMem() && kernel->getLocalMemSize() > MAX_LOCAL_MEM_SIZE ? true : false; if (skipped) { goto skipSim; } for (int t = 0; t < runTimes; t++) { // run all kernels ciErrNum = clEnqueueNDRangeKernel( ocl->getQueue(), kernel->getKernel(), 2, NULL, globalWorkSize, kernel->usesLocalMem() ? localWorkSize : NULL, 0, 0, &eventGlobal ); if (ciErrNum != CL_SUCCESS) { failure = true; break; } // lets do some profiling errcode_ret = clWaitForEvents(1, &eventGlobal); oclCheckError(errcode_ret, CL_SUCCESS); errcode_ret = clGetEventProfilingInfo(eventGlobal, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, 0); errcode_ret |= clGetEventProfilingInfo(eventGlobal, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, 0); kernel->runTimes.push_back( (end-start)*1.0e-6f ); } skipSim: if (failure || skipped) { /* memset( kernel->results, 0, totalSize * sizeof(float) ); kernel->makeOpenGLTexture(); */ cout << (skipped ? "exceeds local mem" : "FAILED !"); cout << endl; } else { // read alien / milirary data ciErrNum = clEnqueueReadBuffer(ocl->getQueue(), output_cl, true, 0, totalSize * sizeof(float), kernel->results, 0, 0, 0); shrCheckErrorEX(ciErrNum, CL_SUCCESS, &OCLContext::Cleanup); kernel->makeOpenGLTexture(); // average run times kernel->averageRunTimes(); cout << fixed << setprecision( 1 ) << kernel->avgRunTime << endl; cout << flush; } } }