cl::NDRange getBestWorkspaceDim(cl::NDRange wsDim) { static std::vector<size_t> MaxDims = CLContextLoader::getDevice().getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>(); static size_t totMax = CLContextLoader::getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); typedef std::map<cl::NDRange,cl::NDRange> memo_map; static memo_map memoing; memo_map::iterator res = memoing.find(wsDim); if ( res != memoing.end()) return res->second; std::vector<std::vector<size_t> > v (wsDim.dimensions()); for (int i=0;i < wsDim.dimensions();++i) { std::vector<size_t> s = factor(wsDim[i]); s.erase( std::upper_bound(s.begin(),s.end(),MaxDims[i]),s.end()); v[i] = s; } std::vector<size_t> dims = maximize(v.begin(),v.end(),totMax); //OLD algorithm /* std::vector<size_t> dims (wsDim.dimensions()); std::transform(static_cast<const size_t*>(wsDim),static_cast<const size_t*>(wsDim)+wsDim.dimensions(), MaxDims.begin(),dims.begin(),std::min<size_t>); int prod = 1; int cnt = 0; for (size_t i=0;i < dims.size();++i) prod*=dims[i]; while (prod > totMax) { dims[ (cnt++)%dims.size()]/=2; prod /=2 ; } */ switch (dims.size()) { case 1: memoing.insert(std::make_pair(wsDim,cl::NDRange(dims[0]) ) );return cl::NDRange(dims[0]); case 2: memoing.insert(std::make_pair(wsDim,cl::NDRange(dims[0],dims[1]) ) );return cl::NDRange(dims[0],dims[1]); case 3: memoing.insert(std::make_pair(wsDim,cl::NDRange(dims[0],dims[1],dims[2]) ) );return cl::NDRange(dims[0],dims[1],dims[2]); } return cl::NullRange; }
void CloverChunk::enqueueKernel (cl::Kernel const& kernel, int line, const char* file, const cl::NDRange offset_range, const cl::NDRange global_range, const cl::NDRange local_range, const std::vector< cl::Event > * const events, cl::Event * const event) { try { if (profiler_on) { // time it cl::Event *prof_event; cl_ulong start, end; // used if no event was passed static cl::Event no_event_passed = cl::Event(); if (event != NULL) { prof_event = event; } else { prof_event = &no_event_passed; } std::string func_name; kernel.getInfo(CL_KERNEL_FUNCTION_NAME, &func_name); #if 0 fprintf(stdout, "Enqueueing kernel: %s\n", func_name.c_str()); fprintf(stdout, "%zu global dimensions\n", global_range.dimensions()); fprintf(stdout, "%zu local dimensions\n", local_range.dimensions()); fprintf(stdout, "%zu offset dimensions\n", offset_range.dimensions()); fprintf(stdout, "Global size: [%zu %zu]\n", global_range[0], global_range[1]); fprintf(stdout, "Local size: [%zu %zu]\n", local_range[0], local_range[1]); fprintf(stdout, "Offset size: [%zu %zu]\n", offset_range[0], offset_range[1]); fprintf(stdout, "\n"); #endif queue.enqueueNDRangeKernel(kernel, offset_range, global_range, local_range, events, prof_event); prof_event->wait(); prof_event->getProfilingInfo(CL_PROFILING_COMMAND_START, &start); prof_event->getProfilingInfo(CL_PROFILING_COMMAND_END, &end); double taken = static_cast<double>(end-start)*1.0e-6; kernel_calls.at(func_name) += 1; kernel_times.at(func_name) += taken; } else { // just launch kernel queue.enqueueNDRangeKernel(kernel, offset_range, global_range, local_range, events, event); } } catch (cl::Error e) { std::string func_name; kernel.getInfo(CL_KERNEL_FUNCTION_NAME, &func_name); // invalid work group size if (e.err() == -54) { std::stringstream errstr; errstr << "Error in enqueueing kernel " << func_name; errstr << " at line " << line << " in " << file << std::endl; errstr << errToString(e.err()).c_str() << std::endl; errstr << "Launched with "; errstr << global_range.dimensions() << " global dimensions, "; errstr << local_range.dimensions() << " local dimensions." << std::endl; for (unsigned int ii = 0; ii < global_range.dimensions(); ii++) { errstr << "Launch dimension " << ii << ": "; errstr << "global " << global_range[ii] << ", "; errstr << "local " << local_range[ii] << " "; // only print this if there is actually an offset if (offset_range.dimensions()) errstr << "(offset " << offset_range[ii] << ") - "; errstr << "(" << global_range[ii] << "%" << local_range[ii] << ") "; errstr << "= " << global_range[ii] % local_range[ii] << std::endl; } DIE(errstr.str().c_str()); } else { DIE("Error in enqueueing kernel '%s' at line %d in %s\n" "Error in %s, code %d (%s) - exiting\n", func_name.c_str(), line, file, e.what(), e.err(), errToString(e.err()).c_str()); } } }