static inline double get_time(cl::Event& event) { using namespace std; using namespace cl; event.wait(); cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); return (end - start) * 1.0e-9; }
void wait(const bolt::cl::control &ctl, ::cl::Event &e) { const bolt::cl::control::e_WaitMode waitMode = ctl.getWaitMode(); if (waitMode == bolt::cl::control::BusyWait) { const ::cl::CommandQueue& q = ctl.getCommandQueue(); q.flush(); while (e.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE) { // spin here for fast completion detection... }; } else if ((waitMode == bolt::cl::control::NiceWait) || (waitMode == bolt::cl::control::BalancedWait)) { cl_int l_Error = e.wait(); V_OPENCL( l_Error, "wait call failed" ); } else if (waitMode == bolt::cl::control::ClFinish) { const ::cl::CommandQueue& q = ctl.getCommandQueue(); cl_int l_Error = q.finish(); V_OPENCL( l_Error, "clFinish call failed" ); } };
int MaxValueSimple::maxValueCL(int* values, size_t len) { try { cl_int status = CL_SUCCESS; /*** Ausgabe von Informationen ueber gewaehltes OpenCL-Device ***/ /* TODO logging Logger::logDebug( METHOD, Logger::sStream << "max compute units: " << devices[0].getInfo< CL_DEVICE_MAX_COMPUTE_UNITS> ()); Logger::logDebug( METHOD, Logger::sStream << "max work item sizes: " << devices[0].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES> ()[0]); Logger::logDebug( METHOD, Logger::sStream << "max work group sizes: " << devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE> ()); Logger::logDebug( METHOD, Logger::sStream << "max global mem size (KB): " << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE> () / 1024); Logger::logDebug( METHOD, Logger::sStream << "max local mem size (KB): " << devices[0].getInfo<CL_DEVICE_LOCAL_MEM_SIZE> () / 1024); */ /*** Erstellen und Vorbereiten der Daten ***/ cl::Buffer vBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int) * len, &values[0], &status); if (status != CL_SUCCESS) { throw cl::Error(status, "cl::Buffer values"); } cmdQ.finish(); /*** Arbeitsgroeszen berechnen ***/ // Anzahl der Work-Items = globalSize // Work-Items pro Work-Group = localSize const size_t MAX_GROUP_SIZE = devices[0].getInfo< CL_DEVICE_MAX_WORK_GROUP_SIZE> (); size_t globalSize; size_t localSize; do { globalSize = len; localSize = MaxValueSimple::calcWorkGroupSize(globalSize, MAX_GROUP_SIZE); if (localSize == 1) { globalSize = ceil((double) len / WG_FAC) * WG_FAC; localSize = MaxValueSimple::calcWorkGroupSize(globalSize, MAX_GROUP_SIZE); /* TODO logging Logger::logDebug( METHOD, Logger::sStream << "GlobalSize has been extended to " << globalSize); */ } /* TODO logging Logger::logDebug(METHOD, Logger::sStream << "globalSize: " << globalSize); Logger::logDebug(METHOD, Logger::sStream << "localSize: " << localSize); */ /*** Kernel-Argumente setzen ***/ status = kernel.setArg(0, vBuffer); if (status != CL_SUCCESS) { throw cl::Error(status, "Kernel.SetArg"); } status = kernel.setArg(1, sizeof(cl_int) * localSize, NULL); if (status != CL_SUCCESS) { throw cl::Error(status, "Kernel.SetArg"); } /*** Kernel ausfuehren und auf Abarbeitung warten ***/ cl::KernelFunctor func = kernel.bind(cmdQ, cl::NDRange(globalSize), cl::NDRange(localSize)); event = func(); event.wait(); cmdQ.finish(); /* runtimeKernel += event.getProfilingInfo<CL_PROFILING_COMMAND_END> (); runtimeKernel -= event.getProfilingInfo<CL_PROFILING_COMMAND_START> (); */ len = globalSize / localSize; } while (globalSize > localSize && localSize > 1); /*** Daten vom OpenCL-Device holen ***/ // TODO nur 1. element auslesen status = cmdQ.enqueueReadBuffer(vBuffer, true, 0, sizeof(cl_int) * 1, &values[0]); if (status != CL_SUCCESS) { throw cl::Error(status, "CommandQueue.enqueueReadBuffer"); } /* TODO logging Logger::log( METHOD, TIME, Logger::sStream << "timeKernel=" << 1.0e-9 * runtimeKernel << ";"); */ return values[0]; } catch (cl::Error& err) { // TODO Logger::logError(METHOD, Logger::sStream << err.what()); std::cerr << "[ERROR] MaxValueSimple::maxValueCL(int*, size_t): " << err.what() << " (" << err.err() << ")" << std::endl; return MaxValueSimple::MAX_FAILURE; } catch (std::exception& err) { // TODO Logger::logError(METHOD, Logger::sStream << err.what()); std::cerr << "[ERROR] MaxValueSimple::maxValueCL(int*, size_t): " << err.what() << std::endl; return MaxValueSimple::MAX_FAILURE; } }
void getProfilingInfo(const cl::Event &ev, cl_ulong* cmdStart, cl_ulong* cmdEnd, cl_ulong* cmdSubmit) { ev.getProfilingInfo(CL_PROFILING_COMMAND_START, cmdStart); ev.getProfilingInfo(CL_PROFILING_COMMAND_END, cmdEnd); if (cmdSubmit) ev.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, cmdSubmit); }