示例#1
0
/*
 * Kernel event
 */
KernelEvent::KernelEvent(CommandQueue *parent,
                         Kernel *kernel,
                         cl_uint work_dim,
                         const size_t *global_work_offset,
                         const size_t *global_work_size,
                         const size_t *local_work_size,
                         cl_uint num_events_in_wait_list,
                         const Event **event_wait_list,
                         cl_int *errcode_ret)
: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret),
  p_work_dim(work_dim), p_kernel(kernel)
{
  // TODO This is where everything else needs to be handled. Need to try to use
  // device specific methods though.
#ifdef DBG_EVENT
  std::cerr << "Entering KernelEvent::KernelEvent\n";
#endif
    if (*errcode_ret != CL_SUCCESS) return;

    *errcode_ret = CL_SUCCESS;

    // Sanity checks
    if (!kernel)
    {
        *errcode_ret = CL_INVALID_KERNEL;
        return;
    }

    // Check that the kernel was built for parent's device.
    DeviceInterface *device;
    Context *k_ctx, *q_ctx;
    size_t max_work_group_size;
    cl_uint max_dims = 0;

    *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *),
                                &device, 0);

    if (*errcode_ret != CL_SUCCESS)
        return;

    *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(Context *), &q_ctx, 0);
    *errcode_ret |= kernel->info(CL_KERNEL_CONTEXT, sizeof(Context *), &k_ctx, 0);
    *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
                                &max_work_group_size, 0);
    *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(size_t),
                                &max_dims, 0);
    *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_SIZES,
                                max_dims * sizeof(size_t), p_max_work_item_sizes, 0);

    if (*errcode_ret != CL_SUCCESS)
        return;

    p_dev_kernel = kernel->deviceDependentKernel(device);
#ifdef DBG_EVENT
    std::cerr << "got deviceDependentKernel\n";
#endif

    if (!p_dev_kernel)
    {
        *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE;
#ifdef DBG_EVENT
        std::cerr << "ERROR: deviceDependentKernel failed\n";
#endif
        return;
    }

    // Check that contexts match
    if (k_ctx != q_ctx)
    {
#ifdef DBG_EVENT
        std::cerr << "ERROR: contexts don't match!\n";
#endif
        *errcode_ret = CL_INVALID_CONTEXT;
        return;
    }

    // Check args
    if (!kernel->argsSpecified())
    {
#ifdef DBG_EVENT
        std::cerr << "ERROR: kernel args aren't specifed\n";
#endif
        *errcode_ret = CL_INVALID_KERNEL_ARGS;
        return;
    }

    // Check dimension
    if (work_dim == 0 || work_dim > max_dims)
    {
#ifdef DBG_EVENT
        std::cerr << "ERROR: invalid work dimension\n";
#endif
        *errcode_ret = CL_INVALID_WORK_DIMENSION;
        return;
    }

    // Initialise kernel attributes
    for (unsigned i = 0; i < 3; ++i) {
      p_global_work_offset[i] = 0;
      p_global_work_size[i] = 0;
      p_local_work_size[i] = 0;
    }

    // Populate work_offset, work_size and local_work_size
    size_t work_group_size = 1;

    for (cl_uint i=0; i<work_dim; ++i)
    {
        if (global_work_offset)
        {
            p_global_work_offset[i] = global_work_offset[i];
        }
        else
        {
            p_global_work_offset[i] = 0;
        }

        if (!global_work_size || !global_work_size[i])
        {
            *errcode_ret = CL_INVALID_GLOBAL_WORK_SIZE;
        }
        p_global_work_size[i] = global_work_size[i];

        if (!local_work_size)
        {
            // Guess the best value according to the device
          // TODO Use this call to calculate work item merges.
          // Also try to set the kernel function to be a tailcall(?)
          // so it doesn't have to save the regs
            p_local_work_size[i] =
                p_dev_kernel->guessWorkGroupSize(work_dim, i, global_work_size[i]);

            // TODO: CL_INVALID_WORK_GROUP_SIZE if
            // __attribute__((reqd_work_group_size(X, Y, Z))) is set
        }
        else
        {
            // Check divisibility
            if ((global_work_size[i] % local_work_size[i]) != 0)
            {
                *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
                return;
            }

            // Not too big ?
            if (local_work_size[i] > p_max_work_item_sizes[i])
            {
                *errcode_ret = CL_INVALID_WORK_ITEM_SIZE;
                return;
            }

            // TODO: CL_INVALID_WORK_GROUP_SIZE if
            // __attribute__((reqd_work_group_size(X, Y, Z))) doesn't match

            p_local_work_size[i] = local_work_size[i];
            work_group_size *= local_work_size[i];
        }
    }

    // Check we don't ask too much to the device
    if (work_group_size > max_work_group_size)
    {
        *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
        return;
    }

    // Check arguments (buffer alignment, image size, ...)
    for (unsigned int i=0; i<kernel->numArgs(); ++i)
    {
#ifdef DBG_EVENT
      std::cerr << "Checking argument " << i << std::endl;
#endif
        const Kernel::Arg &a = kernel->arg(i);
        if (a.file() == Kernel::Arg::Local)
          continue;

        if (a.kind() == Kernel::Arg::Buffer)
        {
#ifdef DBG_EVENT
          std::cerr << "Arg is a buffer\n";
#endif
            const MemObject *buffer = *(const MemObject **)(a.value(0));

            if (!BufferEvent::isSubBufferAligned(buffer, device))
            {
                *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET;
                return;
            }
        }
        else if (a.kind() == Kernel::Arg::Image2D)
        {
            const Image2D *image = *(const Image2D **)(a.value(0));
            size_t maxWidth, maxHeight;

            *errcode_ret = device->info(CL_DEVICE_IMAGE2D_MAX_WIDTH,
                                        sizeof(size_t), &maxWidth, 0);
            *errcode_ret |= device->info(CL_DEVICE_IMAGE2D_MAX_HEIGHT,
                                         sizeof(size_t), &maxHeight, 0);

            if (*errcode_ret != CL_SUCCESS)
                return;

            if (image->width() > maxWidth || image->height() > maxHeight)
            {
                *errcode_ret = CL_INVALID_IMAGE_SIZE;
                return;
            }
        }
        else if (a.kind() == Kernel::Arg::Image3D)
        {
            const Image3D *image = *(const Image3D **)a.value(0);
            size_t maxWidth, maxHeight, maxDepth;

            *errcode_ret = device->info(CL_DEVICE_IMAGE3D_MAX_WIDTH,
                                        sizeof(size_t), &maxWidth, 0);
            *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_HEIGHT,
                                         sizeof(size_t), &maxHeight, 0);
            *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_DEPTH,
                                         sizeof(size_t), &maxDepth, 0);

            if (*errcode_ret != CL_SUCCESS)
                return;

            if (image->width() > maxWidth || image->height() > maxHeight ||
                image->depth() > maxDepth)
            {
                *errcode_ret = CL_INVALID_IMAGE_SIZE;
                return;
            }
        }
    }
#ifdef DBG_EVENT
    std::cerr << "Leaving KernelEvent::KernelEvent\n";
#endif
}
示例#2
0
/*
 * Native kernel
 */
NativeKernelEvent::NativeKernelEvent(CommandQueue *parent,
                                     void (*user_func)(void *),
                                     void *args,
                                     size_t cb_args,
                                     cl_uint num_mem_objects,
                                     const MemObject **mem_list,
                                     const void **args_mem_loc,
                                     cl_uint num_events_in_wait_list,
                                     const Event **event_wait_list,
                                     cl_int *errcode_ret)
: Event (parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret),
  p_user_func((void *)user_func), p_args(0)
{
    if (*errcode_ret != CL_SUCCESS) return;

    // Parameters sanity
    if (!user_func)
    {
        *errcode_ret = CL_INVALID_VALUE;
        return;
    }

    if (!args && (cb_args || num_mem_objects))
    {
        *errcode_ret = CL_INVALID_VALUE;
        return;
    }

    if (args && !cb_args)
    {
        *errcode_ret = CL_INVALID_VALUE;
        return;
    }

    if (num_mem_objects && (!mem_list || !args_mem_loc))
    {
        *errcode_ret = CL_INVALID_VALUE;
        return;
    }

    if (!num_mem_objects && (mem_list || args_mem_loc))
    {
        *errcode_ret = CL_INVALID_VALUE;
        return;
    }

    // Check that the device can execute a native kernel
    DeviceInterface *device;
    cl_device_exec_capabilities caps;

    *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *),
                                &device, 0);

    if (*errcode_ret != CL_SUCCESS)
        return;

    *errcode_ret = device->info(CL_DEVICE_EXECUTION_CAPABILITIES,
                                sizeof(cl_device_exec_capabilities), &caps, 0);

    if (*errcode_ret != CL_SUCCESS)
        return;

    if ((caps & CL_EXEC_NATIVE_KERNEL) == 0)
    {
        *errcode_ret = CL_INVALID_OPERATION;
        return;
    }

    // Copy the arguments in a new list
    if (cb_args)
    {
        p_args = std::malloc(cb_args);

        if (!p_args)
        {
            *errcode_ret = CL_OUT_OF_HOST_MEMORY;
            return;
        }

        std::memcpy((void *)p_args, (void *)args, cb_args);

        // Replace memory objects with global pointers
        for (cl_uint i=0; i<num_mem_objects; ++i)
        {
            const MemObject *buffer = mem_list[i];
            const char *loc = (const char *)args_mem_loc[i];

            if (!buffer)
            {
                *errcode_ret = CL_INVALID_MEM_OBJECT;
                return;
            }

            // We need to do relocation : loc is in args, we need it in p_args
            size_t delta = (char *)p_args - (char *)args;
            loc += delta;

            *(void **)loc = buffer->deviceBuffer(device)->nativeGlobalPointer();
        }
    }
}