cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, ReadBufferOperation *reader) { cl_int error; MemoryBuffer *result = reader->getInputMemoryBuffer(inputMemoryBuffers); const cl_image_format imageFormat = { CL_RGBA, CL_FLOAT }; cl_mem clBuffer = clCreateImage2D(this->m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat, result->getWidth(), result->getHeight(), 0, result->getBuffer(), &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } if (error == CL_SUCCESS) cleanup->push_back(clBuffer); error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result); return clBuffer; }
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation *operation) { cl_int error; const int width = outputMemoryBuffer->getWidth(); const int height = outputMemoryBuffer->getHeight(); int offsetx; int offsety; int localSize = 1024; size_t size[2]; cl_int2 offset; if (this->m_vendorID == NVIDIA) { localSize = 32; } bool breaked = false; for (offsety = 0; offsety < height && (!breaked); offsety += localSize) { offset.s[1] = offsety; if (offsety + localSize < height) { size[1] = localSize; } else { size[1] = height - offsety; } for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) { if (offsetx + localSize < width) { size[0] = localSize; } else { size[0] = width - offsetx; } offset.s[0] = offsetx; error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } error = clEnqueueNDRangeKernel(this->m_queue, kernel, 2, NULL, size, 0, 0, 0, NULL); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } clFlush(this->m_queue); if (operation->isBreaked()) { breaked = false; } } } }
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { device_vector<uint64_t> size_buffer; size_buffer.resize(1); device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); uint threads = num_threads; device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); size_t global_size = 64; device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, device->program_state_buffer_size(), 1, NULL, &global_size, NULL, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); device->mem_free(size_buffer); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return 0; } return *size_buffer.get_data(); }
cOpenClHardware::cOpenClHardware(QObject *parent) : QObject(parent) { openClAvailable = false; contextReady = false; // TODO: confirm initial value // initialize multi-gpu devices' indices list with empty QList selectedDevicesIndices = QList<int>(); missingOpenClDLL = false; selectedPlatformIndex = 0; #ifdef USE_OPENCL #ifdef _WIN32 #ifndef _MSC_VER const std::wstring openclDll(L"OpenCL.dll"); int err = clewInit(openclDll.c_str()); if (err) { qCritical() << clewErrorString(err); missingOpenClDLL = true; } #endif // _MSC_VER #endif isNVidia = false; isAMD = false; context = nullptr; #endif }
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) { device->kernel_set_args(program(), 0, kg, data); device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, program(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } return true; }
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer) { cl_int error; const size_t size[] = {(size_t)outputMemoryBuffer->getWidth(), (size_t)outputMemoryBuffer->getHeight()}; error = clEnqueueNDRangeKernel(this->m_queue, kernel, 2, NULL, size, 0, 0, 0, NULL); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } }
static inline cl_int _clCheck(cl_int errcode, const char *file, int line, const char *func) { mocl_errcode = errcode; if (mocl_errcode != CL_SUCCESS) { error ("%d/%s at %s:%d %s\n", mocl_errcode, clewErrorString(mocl_errcode), file, line, func); } return errcode; }
void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation) { if (offsetIndex != -1) { cl_int error; cl_int2 offset = {{(cl_int)operation->getWidth(), (cl_int)operation->getHeight()}}; error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } } }
void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer) { cl_int error; error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } }
cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp) { cl_int error; cl_kernel kernel = clCreateKernel(this->m_program, kernelname, &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } else { if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel); } return kernel; }
void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer) { if (offsetIndex != -1) { cl_int error; rcti *rect = memoryBuffer->getRect(); cl_int2 offset = {{rect->xmin, rect->ymin}}; error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } } }
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) { if(cached_id != cached_memory.id) { cl_uint start_arg_index = device->kernel_set_args(program(), 0, kg, data, *cached_memory.split_data, *cached_memory.ray_state); device->set_kernel_arg_buffers(program(), &start_arg_index); start_arg_index += device->kernel_set_args(program(), start_arg_index, *cached_memory.queue_index, *cached_memory.use_queues_flag, *cached_memory.work_pools, *cached_memory.buffer); cached_id = cached_memory.id; } device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, program(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } return true; }
const char *clGetErrorString (cl_int error) { return clewErrorString (error); }
void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads) { /* initialize highlighting */ if (!g_highlightInitialized) { if (g_highlightedNodesRead) MEM_freeN(g_highlightedNodesRead); if (g_highlightedNodes) MEM_freeN(g_highlightedNodes); g_highlightedNodesRead = NULL; g_highlightedNodes = NULL; COM_startReadHighlights(); g_highlightInitialized = true; } #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE /* deinitialize if number of threads doesn't match */ if (g_cpudevices.size() != num_cpu_threads) { Device *device; while (g_cpudevices.size() > 0) { device = g_cpudevices.back(); g_cpudevices.pop_back(); device->deinitialize(); delete device; } g_cpuInitialized = false; } /* initialize CPU threads */ if (!g_cpuInitialized) { for (int index = 0; index < num_cpu_threads; index++) { CPUDevice *device = new CPUDevice(); device->initialize(); g_cpudevices.push_back(device); } g_cpuInitialized = true; } #ifdef COM_OPENCL_ENABLED /* deinitialize OpenCL GPU's */ if (use_opencl && !g_openclInitialized) { g_context = NULL; g_program = NULL; if (!OCL_init()) /* this will check for errors and skip if already initialized */ return; if (clCreateContextFromType) { cl_uint numberOfPlatforms = 0; cl_int error; error = clGetPlatformIDs(0, 0, &numberOfPlatforms); if (error == -1001) { } /* GPU not supported */ else if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } if (G.f & G_DEBUG) printf("%u number of platforms\n", numberOfPlatforms); cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN(sizeof(cl_platform_id) * numberOfPlatforms, __func__); error = clGetPlatformIDs(numberOfPlatforms, platforms, 0); unsigned int indexPlatform; for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) { cl_platform_id platform = platforms[indexPlatform]; cl_uint numberOfDevices = 0; clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, 0, &numberOfDevices); if (numberOfDevices <= 0) continue; cl_device_id *cldevices = (cl_device_id *)MEM_mallocN(sizeof(cl_device_id) * numberOfDevices, __func__); clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices, 0); g_context = clCreateContext(NULL, numberOfDevices, cldevices, clContextError, NULL, &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, NULL}; g_program = clCreateProgramWithSource(g_context, 1, cl_str, 0, &error); error = clBuildProgram(g_program, numberOfDevices, cldevices, 0, 0, 0); if (error != CL_SUCCESS) { cl_int error2; size_t ret_val_size = 0; printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__); error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } build_log[ret_val_size] = '\0'; printf("%s", build_log); MEM_freeN(build_log); } else { unsigned int indexDevices; for (indexDevices = 0; indexDevices < numberOfDevices; indexDevices++) { cl_device_id device = cldevices[indexDevices]; cl_int vendorID = 0; cl_int error2 = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL); if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2)); } OpenCLDevice *clDevice = new OpenCLDevice(g_context, device, g_program, vendorID); clDevice->initialize(); g_gpudevices.push_back(clDevice); } } MEM_freeN(cldevices); } MEM_freeN(platforms); } g_openclInitialized = true; } #endif #endif }
void WriteBufferOperation::executeOpenCLRegion(OpenCLDevice *device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer) { float *outputFloatBuffer = outputBuffer->getBuffer(); cl_int error; /* * 1. create cl_mem from outputbuffer * 2. call NodeOperation (input) executeOpenCLChunk(.....) * 3. schedule readback from opencl to main device (outputbuffer) * 4. schedule native callback * * note: list of cl_mem will be filled by 2, and needs to be cleaned up by 4 */ // STEP 1 const unsigned int outputBufferWidth = outputBuffer->getWidth(); const unsigned int outputBufferHeight = outputBuffer->getHeight(); const cl_image_format imageFormat = { CL_RGBA, CL_FLOAT }; cl_mem clOutputBuffer = clCreateImage2D(device->getContext(), CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } // STEP 2 list<cl_mem> *clMemToCleanUp = new list<cl_mem>(); clMemToCleanUp->push_back(clOutputBuffer); list<cl_kernel> *clKernelsToCleanUp = new list<cl_kernel>(); this->m_input->executeOpenCL(device, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp); // STEP 3 size_t origin[3] = {0, 0, 0}; size_t region[3] = {outputBufferWidth, outputBufferHeight, 1}; // clFlush(queue); // clFinish(queue); error = clEnqueueBarrier(device->getQueue()); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } error = clEnqueueReadImage(device->getQueue(), clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } this->getMemoryProxy()->getBuffer()->copyContentFrom(outputBuffer); // STEP 4 while (!clMemToCleanUp->empty()) { cl_mem mem = clMemToCleanUp->front(); error = clReleaseMemObject(mem); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } clMemToCleanUp->pop_front(); } while (!clKernelsToCleanUp->empty()) { cl_kernel kernel = clKernelsToCleanUp->front(); error = clReleaseKernel(kernel); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } clKernelsToCleanUp->pop_front(); } delete clKernelsToCleanUp; }
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, RenderTile& rtile, int num_global_elements, device_memory& kernel_globals, device_memory& kernel_data, device_memory& split_data, device_memory& ray_state, device_memory& queue_index, device_memory& use_queues_flag, device_memory& work_pool_wgs ) { cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; /* Set the range of samples to be processed for every ray in * path-regeneration logic. */ cl_int start_sample = rtile.start_sample; cl_int end_sample = rtile.start_sample + rtile.num_samples; cl_uint start_arg_index = device->kernel_set_args(device->program_data_init(), 0, kernel_globals, kernel_data, split_data, num_global_elements, ray_state, rtile.rng_state); /* TODO(sergey): Avoid map lookup here. */ #define KERNEL_TEX(type, ttype, name) \ device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name); #include "kernel/kernel_textures.h" #undef KERNEL_TEX start_arg_index += device->kernel_set_args(device->program_data_init(), start_arg_index, start_sample, end_sample, rtile.x, rtile.y, rtile.w, rtile.h, rtile.offset, rtile.stride, queue_index, dQueue_size, use_queues_flag, work_pool_wgs, rtile.num_samples, rtile.buffer); /* Enqueue ckPathTraceKernel_data_init kernel. */ device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, device->program_data_init(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } return true; }
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, RenderTile& rtile, int num_global_elements, device_memory& kernel_globals, device_memory& kernel_data, device_memory& split_data, device_memory& ray_state, device_memory& queue_index, device_memory& use_queues_flag, device_memory& work_pool_wgs ) { cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; /* Set the range of samples to be processed for every ray in * path-regeneration logic. */ cl_int start_sample = rtile.start_sample; cl_int end_sample = rtile.start_sample + rtile.num_samples; cl_uint start_arg_index = device->kernel_set_args(device->program_data_init(), 0, kernel_globals, kernel_data, split_data, num_global_elements, ray_state); device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index); start_arg_index += device->kernel_set_args(device->program_data_init(), start_arg_index, start_sample, end_sample, rtile.x, rtile.y, rtile.w, rtile.h, rtile.offset, rtile.stride, queue_index, dQueue_size, use_queues_flag, work_pool_wgs, rtile.num_samples, rtile.buffer); /* Enqueue ckPathTraceKernel_data_init kernel. */ device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, device->program_data_init(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } cached_memory.split_data = &split_data; cached_memory.ray_state = &ray_state; cached_memory.queue_index = &queue_index; cached_memory.use_queues_flag = &use_queues_flag; cached_memory.work_pools = &work_pool_wgs; cached_memory.buffer = &rtile.buffer; cached_memory.id++; return true; }