int call_pocl_build( cl_device_id device, const char* source_file_name, const char* binary_file_name, const char* device_tmpdir, const char* user_options ) { int error; const char *pocl_build_script; char command[COMMAND_LENGTH]; if (getenv("POCL_BUILDING") != NULL) pocl_build_script = BUILDDIR "/scripts/" POCL_BUILD; else if (access(PKGDATADIR "/" POCL_BUILD, X_OK) == 0) pocl_build_script = PKGDATADIR "/" POCL_BUILD; else pocl_build_script = POCL_BUILD; if (device->llvm_target_triplet != NULL) { error = snprintf(command, COMMAND_LENGTH, "USER_OPTIONS=\"%s\" %s -t %s -o %s %s", user_options, pocl_build_script, device->llvm_target_triplet, binary_file_name, source_file_name); } else { error = snprintf(command, COMMAND_LENGTH, "USER_OPTIONS=\"%s\" %s -o %s %s", user_options, pocl_build_script, binary_file_name, source_file_name); } if (error < 0) return CL_OUT_OF_HOST_MEMORY; /* call the customized build command, if needed for the device driver */ if (device->build_program != NULL) { error = device->build_program (device->data, source_file_name, binary_file_name, command, user_options, device_tmpdir); } else { error = system(command); } return error; }
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { #ifdef DBG_API std::cerr << "clGetDeviceInfo\n"; #endif if (!device->isA(Coal::Object::T_Device)) return CL_INVALID_DEVICE; Coal::DeviceInterface *iface = (Coal::DeviceInterface *)device; return iface->info(param_name, param_value_size, param_value, param_value_size_ret); }
// Command Queue APIs cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret) { cl_int default_errcode_ret; // No errcode_ret ? if (!errcode_ret) errcode_ret = &default_errcode_ret; if (!device->isA(Coal::Object::T_Device)) { *errcode_ret = CL_INVALID_DEVICE; return 0; } if (!context->isA(Coal::Object::T_Context)) { *errcode_ret = CL_INVALID_CONTEXT; return 0; } *errcode_ret = CL_SUCCESS; Coal::CommandQueue *queue = new Coal::CommandQueue( (Coal::Context *)context, (Coal::DeviceInterface *)device, properties, errcode_ret); if (*errcode_ret != CL_SUCCESS) { // Initialization failed, destroy context delete queue; return 0; } return (_cl_command_queue *)queue; }
// Command Queue APIs cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret) { #ifdef DBG_API std::cerr << "Entering clCreateCommandQueue\n"; #endif cl_int default_errcode_ret; // No errcode_ret ? if (!errcode_ret) errcode_ret = &default_errcode_ret; #ifdef DBG_API std::cerr << "Check if the device is an object\n"; #endif if (!device->isA(Coal::Object::T_Device)) { std::cerr << "INVALID_DEVICE\n"; *errcode_ret = CL_INVALID_DEVICE; return 0; } if (!context->isA(Coal::Object::T_Context)) { #ifdef DBG_OUTPUT std::cout << "!!! ERROR: INVALID_CONTEXT" << std::endl; #endif *errcode_ret = CL_INVALID_CONTEXT; return 0; } #ifdef DBG_API std::cerr << "Attempt to initialise device\n"; #endif if (!device->init()) { #ifdef DBG_OUTPUT std::cout << "!!!ERROR: Device initialisation failed!\n"; #endif *errcode_ret = CL_DEVICE_NOT_AVAILABLE; return 0; } *errcode_ret = CL_SUCCESS; Coal::CommandQueue *queue = new Coal::CommandQueue( (Coal::Context *)context, (Coal::DeviceInterface *)device, properties, errcode_ret); if (*errcode_ret != CL_SUCCESS) { #ifdef DBG_OUTPUT std::cout << "!!! ERROR: CommandQueue create failed" << std::endl; #endif // Initialization failed, destroy context delete queue; return 0; } return (_cl_command_queue *)queue; }
PUBLIC cl_int clGetDeviceInfo(cl_device_id dev, cl_device_info param, size_t size, void *buf, size_t *size_ret) { if (!dev) return CL_INVALID_DEVICE; switch (param) { case CL_DEVICE_TYPE: return scalar_property<cl_device_type>(buf, size, size_ret, dev->type()); case CL_DEVICE_VENDOR_ID: return scalar_property<cl_uint>(buf, size, size_ret, dev->vendor_id()); case CL_DEVICE_MAX_COMPUTE_UNITS: return scalar_property<cl_uint>(buf, size, size_ret, 1); case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: return scalar_property<cl_uint>(buf, size, size_ret, dev->max_block_size().size()); case CL_DEVICE_MAX_WORK_ITEM_SIZES: return vector_property<size_t>(buf, size, size_ret, dev->max_block_size()); case CL_DEVICE_MAX_WORK_GROUP_SIZE: return scalar_property<size_t>(buf, size, size_ret, dev->max_threads_per_block()); case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: return scalar_property<cl_uint>(buf, size, size_ret, 16); case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: return scalar_property<cl_uint>(buf, size, size_ret, 8); case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: return scalar_property<cl_uint>(buf, size, size_ret, 4); case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: return scalar_property<cl_uint>(buf, size, size_ret, 2); case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: return scalar_property<cl_uint>(buf, size, size_ret, 4); case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: return scalar_property<cl_uint>(buf, size, size_ret, 2); case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: return scalar_property<cl_uint>(buf, size, size_ret, 0); case CL_DEVICE_MAX_CLOCK_FREQUENCY: return scalar_property<cl_uint>(buf, size, size_ret, 0); case CL_DEVICE_ADDRESS_BITS: return scalar_property<cl_uint>(buf, size, size_ret, 32); case CL_DEVICE_MAX_READ_IMAGE_ARGS: return scalar_property<cl_uint>(buf, size, size_ret, dev->max_images_read()); case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: return scalar_property<cl_uint>(buf, size, size_ret, dev->max_images_write()); case CL_DEVICE_MAX_MEM_ALLOC_SIZE: return scalar_property<cl_ulong>(buf, size, size_ret, dev->max_mem_alloc_size()); case CL_DEVICE_IMAGE2D_MAX_WIDTH: case CL_DEVICE_IMAGE2D_MAX_HEIGHT: return scalar_property<size_t>(buf, size, size_ret, 1 << dev->max_image_levels_2d()); case CL_DEVICE_IMAGE3D_MAX_WIDTH: case CL_DEVICE_IMAGE3D_MAX_HEIGHT: case CL_DEVICE_IMAGE3D_MAX_DEPTH: return scalar_property<size_t>(buf, size, size_ret, 1 << dev->max_image_levels_3d()); case CL_DEVICE_IMAGE_SUPPORT: return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE); case CL_DEVICE_MAX_PARAMETER_SIZE: return scalar_property<size_t>(buf, size, size_ret, dev->max_mem_input()); case CL_DEVICE_MAX_SAMPLERS: return scalar_property<cl_uint>(buf, size, size_ret, dev->max_samplers()); case CL_DEVICE_MEM_BASE_ADDR_ALIGN: case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: return scalar_property<cl_uint>(buf, size, size_ret, 128); case CL_DEVICE_SINGLE_FP_CONFIG: return scalar_property<cl_device_fp_config>(buf, size, size_ret, CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST); case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: return scalar_property<cl_device_mem_cache_type>(buf, size, size_ret, CL_NONE); case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: return scalar_property<cl_uint>(buf, size, size_ret, 0); case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: return scalar_property<cl_ulong>(buf, size, size_ret, 0); case CL_DEVICE_GLOBAL_MEM_SIZE: return scalar_property<cl_ulong>(buf, size, size_ret, dev->max_mem_global()); case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: return scalar_property<cl_ulong>(buf, size, size_ret, dev->max_const_buffer_size()); case CL_DEVICE_MAX_CONSTANT_ARGS: return scalar_property<cl_uint>(buf, size, size_ret, dev->max_const_buffers()); case CL_DEVICE_LOCAL_MEM_TYPE: return scalar_property<cl_device_local_mem_type>(buf, size, size_ret, CL_LOCAL); case CL_DEVICE_LOCAL_MEM_SIZE: return scalar_property<cl_ulong>(buf, size, size_ret, dev->max_mem_local()); case CL_DEVICE_ERROR_CORRECTION_SUPPORT: return scalar_property<cl_bool>(buf, size, size_ret, CL_FALSE); case CL_DEVICE_PROFILING_TIMER_RESOLUTION: return scalar_property<size_t>(buf, size, size_ret, 0); case CL_DEVICE_ENDIAN_LITTLE: return scalar_property<cl_bool>(buf, size, size_ret, dev->endianness() == PIPE_ENDIAN_LITTLE); case CL_DEVICE_AVAILABLE: case CL_DEVICE_COMPILER_AVAILABLE: return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE); case CL_DEVICE_EXECUTION_CAPABILITIES: return scalar_property<cl_device_exec_capabilities>(buf, size, size_ret, CL_EXEC_KERNEL); case CL_DEVICE_QUEUE_PROPERTIES: return scalar_property<cl_command_queue_properties>(buf, size, size_ret, CL_QUEUE_PROFILING_ENABLE); case CL_DEVICE_NAME: return string_property(buf, size, size_ret, dev->device_name()); case CL_DEVICE_VENDOR: return string_property(buf, size, size_ret, dev->vendor_name()); case CL_DRIVER_VERSION: return string_property(buf, size, size_ret, PACKAGE_VERSION); case CL_DEVICE_PROFILE: return string_property(buf, size, size_ret, "FULL_PROFILE"); case CL_DEVICE_VERSION: return string_property(buf, size, size_ret, "OpenCL 1.1 MESA " PACKAGE_VERSION); case CL_DEVICE_EXTENSIONS: return string_property(buf, size, size_ret, ""); case CL_DEVICE_PLATFORM: return scalar_property<cl_platform_id>(buf, size, size_ret, &dev->platform); case CL_DEVICE_HOST_UNIFIED_MEMORY: return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE); case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: return scalar_property<cl_uint>(buf, size, size_ret, 16); case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: return scalar_property<cl_uint>(buf, size, size_ret, 8); case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: return scalar_property<cl_uint>(buf, size, size_ret, 4); case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: return scalar_property<cl_uint>(buf, size, size_ret, 2); case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: return scalar_property<cl_uint>(buf, size, size_ret, 4); case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: return scalar_property<cl_uint>(buf, size, size_ret, 2); case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: return scalar_property<cl_uint>(buf, size, size_ret, 0); case CL_DEVICE_OPENCL_C_VERSION: return string_property(buf, size, size_ret, "OpenCL C 1.1"); default: return CL_INVALID_VALUE; } }
extern cl_int pocl_write_image (cl_mem image, cl_device_id device_id, const size_t * origin_, /*[3]*/ const size_t * region_, /*[3]*/ size_t host_row_pitch, size_t host_slice_pitch, const void * ptr) { if (image == NULL) return CL_INVALID_MEM_OBJECT; if ((ptr == NULL) || (region_ == NULL)) return CL_INVALID_VALUE; int width = image->image_width; int height = image->image_height; cl_channel_order order = image->image_channel_order; cl_channel_type type = image->image_channel_data_type; size_t dev_elem_size = sizeof(cl_float); int dev_channels = 4; int host_elem_size; int host_channels; pocl_get_image_information (image, &host_channels, &host_elem_size); size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] }; size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] }; size_t image_row_pitch = width*dev_elem_size*dev_channels; size_t image_slice_pitch = 0; if ((region[0]*region[1]*region[2] > 0) && (region[0]-1 + image_row_pitch * (region[1]-1) + image_slice_pitch * (region[2]-1) >= image->size)) return CL_INVALID_VALUE; cl_float* temp = malloc( width*height*dev_channels*dev_elem_size ); if (temp == NULL) return CL_OUT_OF_HOST_MEMORY; int x, y, k; for (y=0; y<height; y++) for (x=0; x<width*dev_channels; x++) temp[x+y*width*dev_channels] = 0.f; for (y=0; y<height; y++) { for (x=0; x<width; x++) { cl_float elem[4]; //TODO 0,0,0,0 for some modes? for (k=0; k<host_channels; k++) { if (type == CL_FLOAT) elem[k] = ((float*)ptr)[k+(x+y*width)*host_channels]; else if (type==CL_UNORM_INT8) { cl_uchar foo = ((cl_uchar*)ptr)[k+(x+y*width)*host_channels]; elem[k] = (float)(foo) * (1.f/255.f); } else POCL_ABORT_UNIMPLEMENTED(); } if (order == CL_RGBA) for (k=0; k<4; k++) temp[(x+y*width)*dev_channels+k] = elem[k]; else if (order == CL_R) { temp[(x+y*width)*dev_channels+0] = elem[0]; temp[(x+y*width)*dev_channels+1] = 0.f; temp[(x+y*width)*dev_channels+2] = 0.f; temp[(x+y*width)*dev_channels+3] = 1.f; } } } device_id->write_rect(device_id->data, temp, image->device_ptrs[device_id->dev_id], origin, origin, region, image_row_pitch, image_slice_pitch, image_row_pitch, image_slice_pitch); free (temp); return CL_SUCCESS; }
extern cl_int pocl_read_image (cl_mem image, cl_device_id device_id, const size_t * origin_, /*[3]*/ const size_t * region_, /*[3]*/ size_t host_row_pitch, size_t host_slice_pitch, void * ptr) { if (image == NULL) return CL_INVALID_MEM_OBJECT; if ((ptr == NULL) || (region_ == NULL)) return CL_INVALID_VALUE; int width = image->image_width; int height = image->image_height; int dev_elem_size = sizeof(cl_float); int dev_channels = 4; size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] }; size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] }; size_t image_row_pitch = width*dev_elem_size*dev_channels; size_t image_slice_pitch = 0; if ((region[0]*region[1]*region[2] > 0) && (region[0]-1 + image_row_pitch * (region[1]-1) + image_slice_pitch * (region[2]-1) >= image->size)) return CL_INVALID_VALUE; int i, j, k; cl_channel_order order = image->image_channel_order; cl_channel_type type = image->image_channel_data_type; cl_float* temp = malloc( width*height*dev_channels*dev_elem_size ); if (temp == NULL) return CL_OUT_OF_HOST_MEMORY; int host_channels, host_elem_size; pocl_get_image_information(image, &host_channels, &host_elem_size); if (host_row_pitch == 0) { host_row_pitch = width*host_channels; } size_t buffer_origin[3] = { 0, 0, 0 }; device_id->read_rect(device_id->data, temp, image->device_ptrs[device_id->dev_id], origin, origin, region, image_row_pitch, image_slice_pitch, image_row_pitch, image_slice_pitch); for (j=0; j<height; j++) { for (i=0; i<width; i++) { cl_float elem[4]; for (k=0; k<4; k++) elem[k]=0; if (order == CL_RGBA) { for (k=0; k<4; k++) elem[k] = temp[i*dev_channels + j*width*dev_channels + k]; } else if (order == CL_R) { // host_channels == 1 elem[0] = temp[i*dev_channels + j*width*dev_channels + 0]; } if (type == CL_UNORM_INT8) { // host_channels == 4 for (k=0; k<host_channels; k++) { ((cl_uchar*)ptr)[i*host_channels + j*host_row_pitch + k] = (unsigned char)(255*elem[k]); } } else if (type == CL_FLOAT) { for (k=0; k<host_channels; k++) { POCL_ABORT_UNIMPLEMENTED(); ((cl_float*)ptr)[i*host_channels + j*host_row_pitch + k] = elem[k]; } } else POCL_ABORT_UNIMPLEMENTED(); } } free (temp); return CL_SUCCESS; }
YamiStatus OclContext::createBufferFromFdIntel(const cl_import_buffer_info_intel* info, cl_mem* mem) { return m_device->createBufferFromFdIntel(m_context, info, mem); }
bool OclContext::releaseKernel(OclKernelMap& kernelMap) { return m_device->releaseKernel(kernelMap); }
bool OclContext::createKernel(const char* name, OclKernelMap& kernelMap) { return m_device->createKernel(m_context, name, kernelMap); }