result MultiFrame::InitSortKernel(const int &linear) { sort_ = ClKernel(device_id_, "Finalise"); const int alpha_size = 16; const cl_int2 top_left = {0, 0}; sort_.SetNumberedArg(1, sizeof(int), ®ion_width_); sort_.SetNumberedArg(2, sizeof(cl_int2), &top_left); sort_.SetNumberedArg(3, sizeof(int), &linear); sort_.SetNumberedArg(4, sizeof(int), &alpha_size); sort_.SetNumberedArg(5, sizeof(int), &alpha_set_size_); sort_.SetNumberedArg(6, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(alpha_)); sort_.SetNumberedArg(7, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(dest_plane_)); if (sort_.arguments_valid()) { sort_.set_work_dim(2); const size_t set_local_work_size[2] = {8, 16}; // height is increased to offset the fact that 8 work items collaborate on one pixel const size_t set_scalar_global_size[2] = {region_width_, region_height_ << 3}; const size_t set_scalar_item_size[2] = {1, 1}; sort_.set_local_work_size(set_local_work_size); sort_.set_scalar_global_size(set_scalar_global_size); sort_.set_scalar_item_size(set_scalar_item_size); return FILTER_OK; } return FILTER_KERNEL_ARGUMENT_ERROR; }
result MultiFrame::InitFilterKernel( const int &sample_expand, const int &linear, const int &correction, const int &balanced) { filter_ = ClKernel(device_id_, "NLMMultiFrameFourPixel"); filter_.SetNumberedArg(FILTER_ARG_WIDTH, sizeof(int), &width_); filter_.SetNumberedArg(FILTER_ARG_HEIGHT, sizeof(int), &height_); filter_.SetNumberedArg(FILTER_ARG_H, sizeof(float), &h_); filter_.SetNumberedArg(FILTER_ARG_SAMPLE_EXPAND, sizeof(int), &sample_expand); filter_.SetNumberedArg(FILTER_ARG_G_GAUSSIAN, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(g_gaussian)); filter_.SetNumberedArg(FILTER_ARG_LINEAR, sizeof(int), &linear); filter_.SetNumberedArg(FILTER_ARG_ALPHA_SET_SIZE, sizeof(int), &alpha_set_size_); filter_.SetNumberedArg(FILTER_ARG_REGION_ALPHA, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(alpha_)); if (filter_.arguments_valid()) { filter_.set_work_dim(2); const size_t set_local_work_size[2] = {8, 16}; // height is increased to offset the fact that 8 work items collaborate on one pixel const size_t set_scalar_global_size[2] = {region_width_, region_height_ << 3}; const size_t set_scalar_item_size[2] = {1, 1}; filter_.set_local_work_size(set_local_work_size); filter_.set_scalar_global_size(set_scalar_global_size); filter_.set_scalar_item_size(set_scalar_item_size); return FILTER_OK; } return FILTER_KERNEL_ARGUMENT_ERROR; }
void OsdClKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { if (vertex) _currentVertexBuffer = dynamic_cast<OsdClVertexBuffer *>(vertex); else _currentVertexBuffer = NULL; if (varying) _currentVaryingBuffer = dynamic_cast<OsdClVertexBuffer *>(varying); else _currentVaryingBuffer = NULL; int numVertexElements = vertex ? vertex->GetNumElements() : 0; int numVaryingElements = varying ? varying->GetNumElements() : 0; if (_currentVertexBuffer) { _currentVertexBuffer->Map(); } if (_currentVaryingBuffer) { _currentVaryingBuffer->Map(); } // find cl kernel from registry (create it if needed) std::vector<ClKernel>::iterator it = std::find_if(kernelRegistry.begin(), kernelRegistry.end(), ClKernel::Match(numVertexElements, numVaryingElements)); if (it != kernelRegistry.end()) { _clKernel = &(*it); } else { kernelRegistry.push_back(ClKernel()); _clKernel = &kernelRegistry.back(); _clKernel->Compile(_clContext, numVertexElements, numVaryingElements); } }