Example #1
0
result MultiFrame::InitSortKernel(const int &linear) {

    sort_ = ClKernel(device_id_, "Finalise");
    
    const int alpha_size = 16;
    const cl_int2 top_left = {0, 0};

    sort_.SetNumberedArg(1, sizeof(int), &region_width_);
    sort_.SetNumberedArg(2, sizeof(cl_int2), &top_left);
    sort_.SetNumberedArg(3, sizeof(int), &linear);
    sort_.SetNumberedArg(4, sizeof(int), &alpha_size);
    sort_.SetNumberedArg(5, sizeof(int), &alpha_set_size_);
    sort_.SetNumberedArg(6, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(alpha_));
    sort_.SetNumberedArg(7, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(dest_plane_));

    if (sort_.arguments_valid()) {
        sort_.set_work_dim(2);
        const size_t set_local_work_size[2]     = {8, 16};
        // height is increased to offset the fact that 8 work items collaborate on one pixel
        const size_t set_scalar_global_size[2]  = {region_width_, region_height_ << 3};
        const size_t set_scalar_item_size[2]    = {1, 1};

        sort_.set_local_work_size(set_local_work_size);
        sort_.set_scalar_global_size(set_scalar_global_size);
        sort_.set_scalar_item_size(set_scalar_item_size);

        return FILTER_OK;
    }

    return FILTER_KERNEL_ARGUMENT_ERROR;
}
Example #2
0
result MultiFrame::InitFilterKernel(
    const int &sample_expand,
    const int &linear,
    const int &correction,
    const int &balanced) {

    filter_ = ClKernel(device_id_, "NLMMultiFrameFourPixel");
    filter_.SetNumberedArg(FILTER_ARG_WIDTH, sizeof(int), &width_);
    filter_.SetNumberedArg(FILTER_ARG_HEIGHT, sizeof(int), &height_);
    filter_.SetNumberedArg(FILTER_ARG_H, sizeof(float), &h_);
    filter_.SetNumberedArg(FILTER_ARG_SAMPLE_EXPAND, sizeof(int), &sample_expand);
    filter_.SetNumberedArg(FILTER_ARG_G_GAUSSIAN, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(g_gaussian));
    filter_.SetNumberedArg(FILTER_ARG_LINEAR, sizeof(int), &linear);
    filter_.SetNumberedArg(FILTER_ARG_ALPHA_SET_SIZE, sizeof(int), &alpha_set_size_);
    filter_.SetNumberedArg(FILTER_ARG_REGION_ALPHA, sizeof(cl_mem), g_devices[device_id_].buffers_.ptr(alpha_));

    if (filter_.arguments_valid()) {
        filter_.set_work_dim(2);
        const size_t set_local_work_size[2]        = {8, 16};
        // height is increased to offset the fact that 8 work items collaborate on one pixel
        const size_t set_scalar_global_size[2]    = {region_width_, region_height_ << 3};
        const size_t set_scalar_item_size[2]    = {1, 1};

        filter_.set_local_work_size(set_local_work_size);
        filter_.set_scalar_global_size(set_scalar_global_size);
        filter_.set_scalar_item_size(set_scalar_item_size);

        return FILTER_OK;                        
    }

    return FILTER_KERNEL_ARGUMENT_ERROR;
}
void
OsdClKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {

    if (vertex)
        _currentVertexBuffer = dynamic_cast<OsdClVertexBuffer *>(vertex);
    else
        _currentVertexBuffer = NULL;

    if (varying)
        _currentVaryingBuffer = dynamic_cast<OsdClVertexBuffer *>(varying);
    else
        _currentVaryingBuffer = NULL;

    int numVertexElements = vertex ? vertex->GetNumElements() : 0;
    int numVaryingElements = varying ? varying->GetNumElements() : 0;

    if (_currentVertexBuffer) {
        _currentVertexBuffer->Map();
    }
    if (_currentVaryingBuffer) {
        _currentVaryingBuffer->Map();
    }

    // find cl kernel from registry (create it if needed)
    std::vector<ClKernel>::iterator it =
        std::find_if(kernelRegistry.begin(), kernelRegistry.end(),
                     ClKernel::Match(numVertexElements, numVaryingElements));

    if (it != kernelRegistry.end()) {
        _clKernel = &(*it);
    } else {
        kernelRegistry.push_back(ClKernel());
        _clKernel = &kernelRegistry.back();
        _clKernel->Compile(_clContext, numVertexElements, numVaryingElements);
    }
}