예제 #1
0
cl::NDRange getBestWorkspaceDim(cl::NDRange wsDim)
{
	static std::vector<size_t> MaxDims = CLContextLoader::getDevice().getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();

	static size_t totMax = CLContextLoader::getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();

	typedef std::map<cl::NDRange,cl::NDRange> memo_map;
	static memo_map memoing;

	memo_map::iterator res = memoing.find(wsDim);
	if ( res != memoing.end()) return res->second;

	std::vector<std::vector<size_t> > v (wsDim.dimensions());
	for (int i=0;i < wsDim.dimensions();++i)
	{
		std::vector<size_t> s = factor(wsDim[i]);
		s.erase( std::upper_bound(s.begin(),s.end(),MaxDims[i]),s.end());
		v[i] = s;
	}

	std::vector<size_t> dims = maximize(v.begin(),v.end(),totMax);

	//OLD algorithm
	/*
	std::vector<size_t> dims (wsDim.dimensions());

	std::transform(static_cast<const size_t*>(wsDim),static_cast<const size_t*>(wsDim)+wsDim.dimensions(),
				MaxDims.begin(),dims.begin(),std::min<size_t>);


	int prod  = 1;
	int cnt = 0;

	for (size_t i=0;i < dims.size();++i) prod*=dims[i];

	while (prod > totMax)
	{
		dims[ (cnt++)%dims.size()]/=2;
		prod /=2 ;
	}
*/
	switch (dims.size())
	{
	case 1: memoing.insert(std::make_pair(wsDim,cl::NDRange(dims[0]) ) );return cl::NDRange(dims[0]);
	case 2: memoing.insert(std::make_pair(wsDim,cl::NDRange(dims[0],dims[1]) ) );return cl::NDRange(dims[0],dims[1]);
	case 3: memoing.insert(std::make_pair(wsDim,cl::NDRange(dims[0],dims[1],dims[2]) ) );return cl::NDRange(dims[0],dims[1],dims[2]);
	}
	return cl::NullRange;
}
예제 #2
0
void CloverChunk::enqueueKernel
(cl::Kernel const& kernel,
 int line, const char* file,
 const cl::NDRange offset_range,
 const cl::NDRange global_range,
 const cl::NDRange local_range,
 const std::vector< cl::Event > * const events,
 cl::Event * const event)
{
    try
    {
        if (profiler_on)
        {
            // time it
            cl::Event *prof_event;
            cl_ulong start, end;

            // used if no event was passed
            static cl::Event no_event_passed = cl::Event();

            if (event != NULL)
            {
                prof_event = event;
            }
            else
            {
                prof_event = &no_event_passed;
            }

            std::string func_name;
            kernel.getInfo(CL_KERNEL_FUNCTION_NAME, &func_name);

            #if 0
            fprintf(stdout, "Enqueueing kernel: %s\n", func_name.c_str());
            fprintf(stdout, "%zu global dimensions\n", global_range.dimensions());
            fprintf(stdout, "%zu local dimensions\n", local_range.dimensions());
            fprintf(stdout, "%zu offset dimensions\n", offset_range.dimensions());
            fprintf(stdout, "Global size: [%zu %zu]\n", global_range[0], global_range[1]);
            fprintf(stdout, "Local size:  [%zu %zu]\n", local_range[0], local_range[1]);
            fprintf(stdout, "Offset size: [%zu %zu]\n", offset_range[0], offset_range[1]);
            fprintf(stdout, "\n");
            #endif

            queue.enqueueNDRangeKernel(kernel,
                                       offset_range,
                                       global_range,
                                       local_range,
                                       events,
                                       prof_event);

            prof_event->wait();

            prof_event->getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
            prof_event->getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
            double taken = static_cast<double>(end-start)*1.0e-6;

            kernel_calls.at(func_name) += 1;
            kernel_times.at(func_name) += taken;
        }
        else
        {
            // just launch kernel
            queue.enqueueNDRangeKernel(kernel,
                                       offset_range,
                                       global_range,
                                       local_range,
                                       events,
                                       event);
        }
    }
    catch (cl::Error e)
    {
        std::string func_name;
        kernel.getInfo(CL_KERNEL_FUNCTION_NAME, &func_name);

        // invalid work group size
        if (e.err() == -54)
        {
            std::stringstream errstr;
            errstr << "Error in enqueueing kernel " << func_name;
            errstr << " at line " << line << " in " << file << std::endl;
            errstr << errToString(e.err()).c_str() << std::endl;

            errstr << "Launched with ";
            errstr << global_range.dimensions() << " global dimensions, ";
            errstr << local_range.dimensions() << " local dimensions." << std::endl;

            for (unsigned int ii = 0; ii < global_range.dimensions(); ii++)
            {
                errstr << "Launch dimension " << ii << ": ";
                errstr << "global " << global_range[ii] << ", ";
                errstr << "local " << local_range[ii] << " ";
                // only print this if there is actually an offset
                if (offset_range.dimensions()) errstr << "(offset " << offset_range[ii] << ") - ";
                errstr << "(" << global_range[ii] << "%" << local_range[ii] << ") ";
                errstr << "= " << global_range[ii] % local_range[ii] << std::endl;
            }

            DIE(errstr.str().c_str());
        }
        else
        {
            DIE("Error in enqueueing kernel '%s' at line %d in %s\n"
                "Error in %s, code %d (%s) - exiting\n",
                 func_name.c_str(), line, file,
                 e.what(), e.err(), errToString(e.err()).c_str());
        }
    }
}