/* * Kernel event */ KernelEvent::KernelEvent(CommandQueue *parent, Kernel *kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const Event **event_wait_list, cl_int *errcode_ret) : Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret), p_work_dim(work_dim), p_kernel(kernel) { // TODO This is where everything else needs to be handled. Need to try to use // device specific methods though. #ifdef DBG_EVENT std::cerr << "Entering KernelEvent::KernelEvent\n"; #endif if (*errcode_ret != CL_SUCCESS) return; *errcode_ret = CL_SUCCESS; // Sanity checks if (!kernel) { *errcode_ret = CL_INVALID_KERNEL; return; } // Check that the kernel was built for parent's device. DeviceInterface *device; Context *k_ctx, *q_ctx; size_t max_work_group_size; cl_uint max_dims = 0; *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), &device, 0); if (*errcode_ret != CL_SUCCESS) return; *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(Context *), &q_ctx, 0); *errcode_ret |= kernel->info(CL_KERNEL_CONTEXT, sizeof(Context *), &k_ctx, 0); *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, 0); *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(size_t), &max_dims, 0); *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_SIZES, max_dims * sizeof(size_t), p_max_work_item_sizes, 0); if (*errcode_ret != CL_SUCCESS) return; p_dev_kernel = kernel->deviceDependentKernel(device); #ifdef DBG_EVENT std::cerr << "got deviceDependentKernel\n"; #endif if (!p_dev_kernel) { *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE; #ifdef DBG_EVENT std::cerr << "ERROR: deviceDependentKernel failed\n"; #endif return; } // Check that contexts match if (k_ctx != q_ctx) { #ifdef DBG_EVENT std::cerr << "ERROR: contexts don't match!\n"; #endif *errcode_ret = CL_INVALID_CONTEXT; return; } // Check args if (!kernel->argsSpecified()) { #ifdef DBG_EVENT std::cerr << "ERROR: kernel args aren't specifed\n"; #endif *errcode_ret = CL_INVALID_KERNEL_ARGS; return; } // Check dimension if (work_dim == 0 || work_dim > max_dims) { #ifdef DBG_EVENT std::cerr << "ERROR: invalid work dimension\n"; #endif *errcode_ret = CL_INVALID_WORK_DIMENSION; return; } // Initialise kernel attributes for (unsigned i = 0; i < 3; ++i) { p_global_work_offset[i] = 0; p_global_work_size[i] = 0; p_local_work_size[i] = 0; } // Populate work_offset, work_size and local_work_size size_t work_group_size = 1; for (cl_uint i=0; i<work_dim; ++i) { if (global_work_offset) { p_global_work_offset[i] = global_work_offset[i]; } else { p_global_work_offset[i] = 0; } if (!global_work_size || !global_work_size[i]) { *errcode_ret = CL_INVALID_GLOBAL_WORK_SIZE; } p_global_work_size[i] = global_work_size[i]; if (!local_work_size) { // Guess the best value according to the device // TODO Use this call to calculate work item merges. // Also try to set the kernel function to be a tailcall(?) // so it doesn't have to save the regs p_local_work_size[i] = p_dev_kernel->guessWorkGroupSize(work_dim, i, global_work_size[i]); // TODO: CL_INVALID_WORK_GROUP_SIZE if // __attribute__((reqd_work_group_size(X, Y, Z))) is set } else { // Check divisibility if ((global_work_size[i] % local_work_size[i]) != 0) { *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; return; } // Not too big ? if (local_work_size[i] > p_max_work_item_sizes[i]) { *errcode_ret = CL_INVALID_WORK_ITEM_SIZE; return; } // TODO: CL_INVALID_WORK_GROUP_SIZE if // __attribute__((reqd_work_group_size(X, Y, Z))) doesn't match p_local_work_size[i] = local_work_size[i]; work_group_size *= local_work_size[i]; } } // Check we don't ask too much to the device if (work_group_size > max_work_group_size) { *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; return; } // Check arguments (buffer alignment, image size, ...) for (unsigned int i=0; i<kernel->numArgs(); ++i) { #ifdef DBG_EVENT std::cerr << "Checking argument " << i << std::endl; #endif const Kernel::Arg &a = kernel->arg(i); if (a.file() == Kernel::Arg::Local) continue; if (a.kind() == Kernel::Arg::Buffer) { #ifdef DBG_EVENT std::cerr << "Arg is a buffer\n"; #endif const MemObject *buffer = *(const MemObject **)(a.value(0)); if (!BufferEvent::isSubBufferAligned(buffer, device)) { *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET; return; } } else if (a.kind() == Kernel::Arg::Image2D) { const Image2D *image = *(const Image2D **)(a.value(0)); size_t maxWidth, maxHeight; *errcode_ret = device->info(CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &maxWidth, 0); *errcode_ret |= device->info(CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &maxHeight, 0); if (*errcode_ret != CL_SUCCESS) return; if (image->width() > maxWidth || image->height() > maxHeight) { *errcode_ret = CL_INVALID_IMAGE_SIZE; return; } } else if (a.kind() == Kernel::Arg::Image3D) { const Image3D *image = *(const Image3D **)a.value(0); size_t maxWidth, maxHeight, maxDepth; *errcode_ret = device->info(CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &maxWidth, 0); *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &maxHeight, 0); *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &maxDepth, 0); if (*errcode_ret != CL_SUCCESS) return; if (image->width() > maxWidth || image->height() > maxHeight || image->depth() > maxDepth) { *errcode_ret = CL_INVALID_IMAGE_SIZE; return; } } } #ifdef DBG_EVENT std::cerr << "Leaving KernelEvent::KernelEvent\n"; #endif }
/* * Native kernel */ NativeKernelEvent::NativeKernelEvent(CommandQueue *parent, void (*user_func)(void *), void *args, size_t cb_args, cl_uint num_mem_objects, const MemObject **mem_list, const void **args_mem_loc, cl_uint num_events_in_wait_list, const Event **event_wait_list, cl_int *errcode_ret) : Event (parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret), p_user_func((void *)user_func), p_args(0) { if (*errcode_ret != CL_SUCCESS) return; // Parameters sanity if (!user_func) { *errcode_ret = CL_INVALID_VALUE; return; } if (!args && (cb_args || num_mem_objects)) { *errcode_ret = CL_INVALID_VALUE; return; } if (args && !cb_args) { *errcode_ret = CL_INVALID_VALUE; return; } if (num_mem_objects && (!mem_list || !args_mem_loc)) { *errcode_ret = CL_INVALID_VALUE; return; } if (!num_mem_objects && (mem_list || args_mem_loc)) { *errcode_ret = CL_INVALID_VALUE; return; } // Check that the device can execute a native kernel DeviceInterface *device; cl_device_exec_capabilities caps; *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), &device, 0); if (*errcode_ret != CL_SUCCESS) return; *errcode_ret = device->info(CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(cl_device_exec_capabilities), &caps, 0); if (*errcode_ret != CL_SUCCESS) return; if ((caps & CL_EXEC_NATIVE_KERNEL) == 0) { *errcode_ret = CL_INVALID_OPERATION; return; } // Copy the arguments in a new list if (cb_args) { p_args = std::malloc(cb_args); if (!p_args) { *errcode_ret = CL_OUT_OF_HOST_MEMORY; return; } std::memcpy((void *)p_args, (void *)args, cb_args); // Replace memory objects with global pointers for (cl_uint i=0; i<num_mem_objects; ++i) { const MemObject *buffer = mem_list[i]; const char *loc = (const char *)args_mem_loc[i]; if (!buffer) { *errcode_ret = CL_INVALID_MEM_OBJECT; return; } // We need to do relocation : loc is in args, we need it in p_args size_t delta = (char *)p_args - (char *)args; loc += delta; *(void **)loc = buffer->deviceBuffer(device)->nativeGlobalPointer(); } } }