Ensure(eclGetContextInteractively, commandQueueConsistentWithDevice) { size_t size; cl_device_id qDevice; err = eclGetContextInteractively(&ctx); assert_that(err, is_equal_to(CL_SUCCESS)); clGetCommandQueueInfo(ctx.queue, CL_QUEUE_DEVICE, 0, 0, &size); assert_that(err, is_equal_to(CL_SUCCESS)); assert_that(size, is_equal_to(sizeof(qDevice))); clGetCommandQueueInfo(ctx.queue, CL_QUEUE_DEVICE, size, &qDevice, 0); assert_that(qDevice, is_equal_to(ctx.device)); }
Ensure(eclGetContextInteractively, commandQueueConsistentWithContext) { size_t size; cl_context qCtx; err = eclGetContextInteractively(&ctx); assert_that(err, is_equal_to(CL_SUCCESS)); clGetCommandQueueInfo(ctx.queue, CL_QUEUE_CONTEXT, 0, 0, &size); assert_that(err, is_equal_to(CL_SUCCESS)); assert_that(size, is_equal_to(sizeof(qCtx))); clGetCommandQueueInfo(ctx.queue, CL_QUEUE_CONTEXT, size, &qCtx, 0); assert_that(qCtx, is_equal_to(ctx.context)); }
void RunOpenCLKernel(void* p_CmdQ, int p_Width, int p_Height, const float* p_Value, const float* p_Input, float* p_Output) { cl_int error; cl_command_queue cmdQ = static_cast<cl_command_queue>(p_CmdQ); static cl_context clContext = NULL; if (clContext == NULL) { error = clGetCommandQueueInfo(cmdQ, CL_QUEUE_CONTEXT, sizeof(cl_context), &clContext, NULL); CheckError(error, "Unable to get the context"); } static cl_device_id deviceId = NULL; if (deviceId == NULL) { error = clGetCommandQueueInfo(cmdQ, CL_QUEUE_DEVICE, sizeof(cl_device_id), &deviceId, NULL); CheckError(error, "Unable to get the device"); } static cl_kernel kernel = NULL; if (kernel == NULL) { cl_program program = clCreateProgramWithSource(clContext, 1, (const char **)&KernelSource, NULL, &error); CheckError(error, "Unable to create program"); error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); CheckError(error, "Unable to build program"); kernel = clCreateKernel(program, "MultiplyKernel", &error); CheckError(error, "Unable to create kernel"); } int count = 0; error = clSetKernelArg(kernel, count++, sizeof(int), &p_Width); error |= clSetKernelArg(kernel, count++, sizeof(int), &p_Height); error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Value[0]); error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Value[1]); error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Value[2]); error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Value[3]); error |= clSetKernelArg(kernel, count++, sizeof(cl_mem), &p_Input); error |= clSetKernelArg(kernel, count++, sizeof(cl_mem), &p_Output); CheckError(error, "Unable to set kernel arguments"); size_t localWorkSize[2], globalWorkSize[2]; clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), localWorkSize, NULL); localWorkSize[1] = 1; globalWorkSize[0] = ((p_Width + localWorkSize[0] - 1) / localWorkSize[0]) * localWorkSize[0]; globalWorkSize[1] = p_Height; clEnqueueNDRangeKernel(cmdQ, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); }
/** Test if all the objects exist on the same command queue. * @return CL_SUCCESS if all the objects are associated * with the command queue. CL_INVALID_CONTEXT if the objects * are associated to different command queues. Other errors * can be returned if some objects are invalid. */ cl_int testCommandQueue(cl_command_queue command_queue , cl_mem mem , cl_uint num_events_in_wait_list , const ocland_event * event_wait_list) { unsigned int i; cl_int flag; cl_context context, aux_context; flag = clGetCommandQueueInfo(command_queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); if(flag != CL_SUCCESS) return flag; flag = clGetMemObjectInfo(mem, CL_MEM_CONTEXT, sizeof(cl_context), &aux_context, NULL); if(flag != CL_SUCCESS) return flag; if(context != aux_context) return CL_INVALID_CONTEXT; for(i=0;i<num_events_in_wait_list;i++){ if(event_wait_list[i]->context != context) return CL_INVALID_CONTEXT; if(event_wait_list[i]->command_queue){ // Can be NULL if(event_wait_list[i]->command_queue != command_queue) return CL_INVALID_CONTEXT; } } return CL_SUCCESS; }
template <typename T> cl_int DWTKernel<T>::run(T* in, int sizeX, int sizeY, int levels){ if (!in) return CL_INVALID_VALUE; cl_int error_code; cl_context context = NULL; // Obtain the OpenCL context from the command-queue properties error_code = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); if (CL_SUCCESS != error_code) { LogError("Error: clGetCommandQueueInfo (CL_QUEUE_CONTEXT) returned %s.\n", TranslateOpenCLError(error_code)); return error_code; } // allocate memory on device srcMem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeX * sizeY * sizeof(T), in, &error_code); if (CL_SUCCESS != error_code) { LogError("Error: clCreateBuffer (in) returned %s.\n", TranslateOpenCLError(error_code)); return error_code; } dstMem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeX * sizeY * sizeof(T), NULL, &error_code); if (CL_SUCCESS != error_code) { LogError("Error: clCreateBuffer (out) returned %s.\n", TranslateOpenCLError(error_code)); return error_code; } ownsMemory = true; run(srcMem, dstMem, sizeX, sizeY, levels); return CL_SUCCESS; }
/* readonly attribute AString extensions; */ NS_IMETHODIMP dpoCContext::GetExtensions(nsAString & aExtensions) { char *rString = NULL; cl_device_id device; size_t length; cl_int err; nsresult result; err = clGetCommandQueueInfo(cmdQueue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL); if (err != CL_SUCCESS) return NS_ERROR_NOT_AVAILABLE; err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &length); if (err == CL_SUCCESS) { rString = (char *) nsMemory::Alloc(sizeof(char)*(length+1)); err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, length, rString, NULL); aExtensions.AssignLiteral(rString); nsMemory::Free(rString); result = NS_OK; } else { result = NS_ERROR_NOT_AVAILABLE; } return result; }
cl_int WINAPI wine_clGetCommandQueueInfo(cl_command_queue command_queue, cl_command_queue_info param_name, SIZE_T param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int ret; TRACE("%p, %d, %ld, %p, %p\n", command_queue, param_name, param_value_size, param_value, param_value_size_ret); ret = clGetCommandQueueInfo(command_queue, param_name, param_value_size, param_value, param_value_size_ret); return ret; }
/*! \brief Returns the number of reference counts. * */ size_t ocl::Queue::reference_count() const { if(_id == nullptr) throw std::runtime_error("could not performance a reference count"); cl_uint info; OPENCL_SAFE_CALL( clGetCommandQueueInfo (_id, CL_QUEUE_REFERENCE_COUNT, sizeof(info), &info, NULL)) ; return size_t(info); }
Ensure(eclGetContextInteractively, returnsAValidCommandQueue) { eclGetContextInteractively(&ctx); if (ctx.queue) { size_t size; err = clGetCommandQueueInfo(ctx.queue, CL_QUEUE_PROPERTIES, 0, 0, &size); assert_that(err, is_equal_to(CL_SUCCESS)); } }
void bfam_cl_print_device_info_from_queue(cl_command_queue queue) { cl_device_id dev; BFAM_CL_SAFE_CALL(clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL)); bfam_cl_print_device_info(dev); }
cl_context oclGetContextOfCQueue(cl_command_queue q) { cl_context res; cl_int err = 0; size_t lres = 0; err = clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &res, &lres); oclCheckErr(err, "clGetCommandQueueInfo qCtx"); return res; }
cl_device_id oclGetDeviceOfCQueue(cl_command_queue q) { cl_device_id res; cl_int err = 0; size_t lres = 0; err = clGetCommandQueueInfo(q, CL_QUEUE_DEVICE, sizeof(cl_device_id), &res, &lres); oclCheckErr(err, "clGetCommandQueueInfo qDev"); return res; }
extern "C" void initScan(cl_context cxGPUContext, cl_command_queue cqParamCommandQue, const char **argv) { cl_int ciErrNum; size_t kernelLength; shrLog(" ...loading Scan.cl\n"); char *cScan = oclLoadProgSource(shrFindFilePath("Scan.cl", argv[0]), "// My comment\n", &kernelLength); oclCheckError(cScan != NULL, shrTRUE); shrLog(" ...creating scan program\n"); cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cScan, &kernelLength, &ciErrNum); oclCheckError(ciErrNum, CL_SUCCESS); shrLog(" ...building scan program\n"); ciErrNum = clBuildProgram(cpProgram, 0, NULL, compileOptions, NULL, NULL); if (ciErrNum != CL_SUCCESS) { // write out standard error, Build Log and PTX, then cleanup and exit shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); oclLogBuildInfo(cpProgram, oclGetFirstDev(cxGPUContext)); oclLogPtx(cpProgram, oclGetFirstDev(cxGPUContext), "oclScan.ptx"); oclCheckError(ciErrNum, CL_SUCCESS); } shrLog(" ...creating scan kernels\n"); ckScanExclusiveLocal1 = clCreateKernel(cpProgram, "scanExclusiveLocal1", &ciErrNum); oclCheckError(ciErrNum, CL_SUCCESS); ckScanExclusiveLocal2 = clCreateKernel(cpProgram, "scanExclusiveLocal2", &ciErrNum); oclCheckError(ciErrNum, CL_SUCCESS); ckUniformUpdate = clCreateKernel(cpProgram, "uniformUpdate", &ciErrNum); oclCheckError(ciErrNum, CL_SUCCESS); shrLog( " ...checking minimum supported workgroup size\n"); //Check for work group size cl_device_id device; size_t szScanExclusiveLocal1, szScanExclusiveLocal2, szUniformUpdate; ciErrNum = clGetCommandQueueInfo(cqParamCommandQue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL); ciErrNum |= clGetKernelWorkGroupInfo(ckScanExclusiveLocal1, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &szScanExclusiveLocal1, NULL); ciErrNum |= clGetKernelWorkGroupInfo(ckScanExclusiveLocal2, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &szScanExclusiveLocal2, NULL); ciErrNum |= clGetKernelWorkGroupInfo(ckUniformUpdate, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &szUniformUpdate, NULL); oclCheckError(ciErrNum, CL_SUCCESS); if( (szScanExclusiveLocal1 < WORKGROUP_SIZE) || (szScanExclusiveLocal2 < WORKGROUP_SIZE) || (szUniformUpdate < WORKGROUP_SIZE) ) { shrLog("ERROR: Minimum work-group size %u required by this application is not supported on this device.\n", WORKGROUP_SIZE); exit(0); } shrLog(" ...allocating internal buffers\n"); d_Buffer = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, (MAX_BATCH_ELEMENTS / (4 * WORKGROUP_SIZE)) * sizeof(uint), NULL, &ciErrNum); oclCheckError(ciErrNum, CL_SUCCESS); //Discard temp storage free(cScan); }
// -------------------- extern "C" magma_int_t magma_queue_meminfo( magma_queue_t queue ) { cl_device_id dev; clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &dev, NULL); cl_ulong mem_size; clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &mem_size, NULL); mem_size /= sizeof(magmaDoubleComplex); return mem_size; }
struct _cl_version clGetCommandQueueVersion(cl_command_queue command_queue) { struct _cl_version version; version.major = 0; version.minor = 0; cl_device_id device = NULL; cl_int flag = clGetCommandQueueInfo(command_queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL); if(flag != CL_SUCCESS) return version; return clGetDeviceVersion(device); }
cl_context getQueueContext(cl_command_queue commandQueue, cl_int *error) { cl_int err; cl_context ctx = NULL; err = clGetCommandQueueInfo(commandQueue, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL); if (error != NULL) { *error = err; } return ctx; }
cl_int VISIBILITY_HIDDEN getQueueDevice(cl_command_queue queue, cl_device_id *device) { cl_int err; cl_device_id dev; err = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &dev, NULL); if (err != CL_SUCCESS) return err; if (device != NULL) *device = dev; return err; }
cl_int VISIBILITY_HIDDEN getQueueContext(cl_command_queue queue, cl_context *context) { cl_int err; cl_context ctx; err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL); if (err != CL_SUCCESS) return err; if (context != NULL) *context = ctx; return err; }
cl_int VISIBILITY_HIDDEN getQueueProperties( cl_command_queue queue, cl_command_queue_properties *props) { cl_int err; cl_command_queue_properties p; err = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &p, NULL); if (err != CL_SUCCESS) return err; if (props != NULL) *props = p; return err; }
cl_command_queue OclResourceServiceImpl::GetCommandQueue() const { // check if queue valid cl_context clQueueContext; // check if there is a context available // if not create one if( ! m_ContextCollection ) { m_ContextCollection = new OclContextCollection(); } cl_int clErr = clGetCommandQueueInfo( m_ContextCollection->m_CommandQueue, CL_QUEUE_CONTEXT, sizeof(clQueueContext), &clQueueContext, NULL ); if( clErr != CL_SUCCESS || clQueueContext != m_ContextCollection->m_Context ) { MITK_WARN << "Have no valid command queue. Query returned : " << GetOclErrorAsString( clErr ); return NULL; } return m_ContextCollection->m_CommandQueue; }
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir) { cl_fft_plan *plan = (cl_fft_plan *) Plan; unsigned int N = numRows*numCols; unsigned int nCols = numCols; unsigned int sRow = startRow; unsigned int rToProcess = rowsToProcess; int d = dir; int err = 0; cl_device_id device_id; err = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device_id, NULL); if(err) return err; size_t gSize; err = clGetKernelWorkGroupInfo(plan->twist_kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &gSize, NULL); if(err) return err; gSize = min(128, gSize); size_t numGlobalThreads[1] = { max(numCols / gSize, 1)*gSize }; size_t numLocalThreads[1] = { gSize }; err |= clSetKernelArg(plan->twist_kernel, 0, sizeof(cl_mem), &array_real); err |= clSetKernelArg(plan->twist_kernel, 1, sizeof(cl_mem), &array_imag); err |= clSetKernelArg(plan->twist_kernel, 2, sizeof(unsigned int), &sRow); err |= clSetKernelArg(plan->twist_kernel, 3, sizeof(unsigned int), &nCols); err |= clSetKernelArg(plan->twist_kernel, 4, sizeof(unsigned int), &N); err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(unsigned int), &rToProcess); err |= clSetKernelArg(plan->twist_kernel, 6, sizeof(int), &d); err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL); return err; }
void print_kernel_info(cl_command_queue queue, cl_kernel knl) { // get device associated with the queue cl_device_id dev; CALL_CL_SAFE(clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(dev), &dev, NULL)); char kernel_name[4096]; CALL_CL_SAFE(clGetKernelInfo(knl, CL_KERNEL_FUNCTION_NAME, sizeof(kernel_name), &kernel_name, NULL)); kernel_name[4095] = '\0'; printf("Info for kernel %s:\n", kernel_name); size_t kernel_work_group_size; CALL_CL_SAFE(clGetKernelWorkGroupInfo(knl, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_work_group_size), &kernel_work_group_size, NULL)); printf(" CL_KERNEL_WORK_GROUP_SIZE=%zd\n", kernel_work_group_size); size_t preferred_work_group_size_multiple; CALL_CL_SAFE(clGetKernelWorkGroupInfo(knl, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(preferred_work_group_size_multiple), &preferred_work_group_size_multiple, NULL)); printf(" CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE=%zd\n", preferred_work_group_size_multiple); cl_ulong kernel_local_mem_size; CALL_CL_SAFE(clGetKernelWorkGroupInfo(knl, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL)); printf(" CL_KERNEL_LOCAL_MEM_SIZE=%llu\n", (long long unsigned int)kernel_local_mem_size); cl_ulong kernel_private_mem_size; CALL_CL_SAFE(clGetKernelWorkGroupInfo(knl, dev, CL_KERNEL_PRIVATE_MEM_SIZE, sizeof(kernel_private_mem_size), &kernel_private_mem_size, NULL)); printf(" CL_KERNEL_PRIVATE_MEM_SIZE=%llu\n", (long long unsigned int)kernel_private_mem_size); }
cl_int clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; static cl_program last_program = NULL; static cl_context last_context = NULL; cl_kernel kernel; cl_program program; cl_context context; cl_device_id device; size_t gworksz; size_t lworksz; char kernel_name[80]; cl_int rc; union { cl_char v_char; cl_short v_short; cl_int v_int; cl_long v_long; } pattern_value; cl_uint pattern_nums; switch (pattern_size) { case sizeof(cl_char): pattern_value.v_char = *((cl_char *)pattern); break; case sizeof(cl_short): pattern_value.v_short = *((cl_short *)pattern); break; case sizeof(cl_int): pattern_value.v_int = *((cl_int *)pattern); break; case sizeof(cl_long): pattern_value.v_long = *((cl_long *)pattern); break; default: /* * pattern_size was not support one, even though OpenCL 1.2 * spec says 16, 32, 64 or 128 bytes patterns are supported. */ return CL_INVALID_VALUE; } /* ensure alignment */ if (offset % pattern_size != 0) return CL_INVALID_VALUE; if (size % pattern_size != 0) return CL_INVALID_VALUE; /* fetch context and device_id associated with this command queue */ rc = clGetCommandQueueInfo(command_queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); if (rc != CL_SUCCESS) return rc; pthread_mutex_lock(&lock); if (last_program && last_context == context) { rc = clRetainProgram(last_program); if (rc != CL_SUCCESS) goto out_unlock; program = last_program; } else { char source[10240]; const char *prog_source[1]; size_t prog_length[1]; cl_uint num_devices; cl_device_id *device_ids; static struct { const char *type_name; size_t type_size; } pattern_types[] = { { "char", sizeof(cl_char) }, { "short", sizeof(cl_short) }, { "int", sizeof(cl_int) }, { "long", sizeof(cl_long) }, }; size_t i, ofs; /* fetch properties of cl_context */ rc = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL); if (rc != CL_SUCCESS) goto out_unlock; Assert(num_devices > 0); device_ids = calloc(num_devices, sizeof(cl_device_id)); if (!device_ids) { rc = CL_OUT_OF_HOST_MEMORY; goto out_unlock; } rc = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id) * num_devices, device_ids, NULL); if (rc != CL_SUCCESS) { free(device_ids); goto out_unlock; } /* release the previous program */ if (last_program) { rc = clReleaseProgram(last_program); Assert(rc == CL_SUCCESS); last_program = NULL; last_context = NULL; } /* create a program object */ for (i=0, ofs=0; i < lengthof(pattern_types); i++) { ofs += snprintf( source + ofs, sizeof(source) - ofs, "__kernel void\n" "pgstromEnqueueFillBuffer_%zu(__global %s *buffer,\n" " %s value, uint nums)\n" "{\n" " if (get_global_id(0) >= nums)\n" " return;\n" " buffer[get_global_id(0)] = value;\n" "}\n", pattern_types[i].type_size, pattern_types[i].type_name, pattern_types[i].type_name); } prog_source[0] = source; prog_length[0] = ofs; program = clCreateProgramWithSource(context, 1, prog_source, prog_length, &rc); if (rc != CL_SUCCESS) { free(device_ids); goto out_unlock; } /* build this program object */ rc = clBuildProgram(program, num_devices, device_ids, NULL, NULL, NULL); free(device_ids); if (rc != CL_SUCCESS) { clReleaseProgram(program); goto out_unlock; } /* acquire the program object */ rc = clRetainProgram(program); if (rc != CL_SUCCESS) { clReleaseProgram(program); goto out_unlock; } last_program = program; last_context = context; } pthread_mutex_unlock(&lock); Assert(program != NULL); /* fetch a device id of this command queue */ rc = clGetCommandQueueInfo(command_queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL); if (rc != CL_SUCCESS) goto out_release_program; /* fetch a kernel object to be called */ snprintf(kernel_name, sizeof(kernel_name), "pgstromEnqueueFillBuffer_%zu", pattern_size); kernel = clCreateKernel(program, kernel_name, &rc); if (rc != CL_SUCCESS) goto out_release_program; /* 1st arg: __global <typename> *buffer */ rc = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer); if (rc != CL_SUCCESS) goto out_release_kernel; /* 2nd arg: <typename> value */ rc = clSetKernelArg(kernel, 1, pattern_size, &pattern_value); if (rc != CL_SUCCESS) goto out_release_kernel; /* 3rd arg: size_t nums */ pattern_nums = (offset + size) / pattern_size; rc = clSetKernelArg(kernel, 2, sizeof(cl_uint), &pattern_nums); if (rc != CL_SUCCESS) goto out_release_kernel; /* calculate optimal workgroup size */ rc = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &lworksz, NULL); Assert((lworksz & (lworksz - 1)) == 0); gworksz = ((size / pattern_size + lworksz - 1) / lworksz) * lworksz; /* enqueue a kernel, instead of clEnqueueFillBuffer */ offset /= pattern_size; rc = clEnqueueNDRangeKernel(command_queue, kernel, 1, &offset, &gworksz, &lworksz, num_events_in_wait_list, event_wait_list, event); if (rc != CL_SUCCESS) goto out_release_kernel; rc = clSetEventCallback(*event, CL_COMPLETE, pgstromEnqueueFillBufferCleanup, kernel); if (rc != CL_SUCCESS) { clWaitForEvents(1, event); goto out_release_kernel; } return CL_SUCCESS; out_unlock: pthread_mutex_unlock(&lock); return rc; out_release_kernel: clReleaseKernel(kernel); out_release_program: clReleaseProgram(program); return rc; }
void OCLUtil::Init() { cl_int error = 0; // Used to handle error codes cl_uint numberOfPlatforms; cl_uint nplatforms; cl_device_id device; size_t length; createContextSuccess = false; createCommandQueueSuccess = false; openclModule = 0; // Load OpenCL library openclModule = LoadLibrary(TEXT("OpenCL.dll")); if (!openclModule) { DEBUG_LOG_ERROR("Init", "Load OpenCL.dll failed."); return; } // Initialize function entries #define INITIALIZE_FUNCTION_ENTRY(name) checkFunction(__##name = (name##Function) GetProcAddress(openclModule, #name)); OPENCL_FUNCTION_LIST(INITIALIZE_FUNCTION_ENTRY) #undef INITIALIZE_FUNCTION_ENTRY openclFlag = true; // Platform error = clGetPlatformIDs( 0, 0, &nplatforms); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform number error: " << error); return; } cl_platform_id* m_platforms = new cl_platform_id[nplatforms]; error = clGetPlatformIDs(nplatforms, m_platforms, &numberOfPlatforms); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform id error: " << error); delete [] m_platforms; return; } const cl_uint maxNameLength = 256; char name[maxNameLength]; for (cl_uint i = 0; i < numberOfPlatforms; i++) { error = clGetPlatformInfo(m_platforms[i], CL_PLATFORM_NAME, maxNameLength * sizeof(char), name, 0); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform name error: " << error); } else { if (!strcmp(name, "Intel(R) OpenCL") || !strcmp(name, "AMD Accelerated Parallel Processing")) { platform_ = m_platforms[i]; break; } } } delete [] m_platforms; if (!platform_) { DEBUG_LOG_ERROR("Init", "Find Intel or AMD platform failed."); return; } // Version char* temp; error = getPlatformPropertyHelper(CL_PLATFORM_VERSION, temp); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform version error: " << error); } else { version_ = std::string(temp); delete [] temp; } // Name error = getPlatformPropertyHelper(CL_PLATFORM_NAME, temp); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform name error: " << error); } else { name_ = std::string(temp); delete [] temp; } // Vendor error = getPlatformPropertyHelper(CL_PLATFORM_VENDOR, temp); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform vendor error: " << error); } else { vendor_ = std::string(temp); delete [] temp; } // Profile error = getPlatformPropertyHelper(CL_PLATFORM_PROFILE, temp); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform profile error: " << error); } else { profile_ = std::string(temp); delete [] temp; } // Platform Extensions error = getPlatformPropertyHelper(CL_PLATFORM_EXTENSIONS, temp); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get platform extension error: " << error); } else { platformExtensions_ = std::string(temp); delete [] temp; } // Number of Device cl_uint number; error = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_ALL, 0, 0, &number); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get device number error: " << error); } else { numberOfDevices_ = number; } // Context cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_, 0}; context_ = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, &reportCLError, this, &error); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Create context error: " << error); return; } createContextSuccess = true; // Device size_t cb; error = clGetContextInfo(context_, CL_CONTEXT_DEVICES, 0, 0, &cb); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get context device number error: " << error); return; } cl_device_id* devices = (cl_device_id*)malloc(sizeof(cl_device_id) * cb); if (!devices) { DEBUG_LOG_STATUS("Init", "Cannot allocate device list"); return; } error = clGetContextInfo(context_, CL_CONTEXT_DEVICES, cb, devices, 0); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get context device info error: " << error); free(devices); return; } // Command Queue queue_ = clCreateCommandQueue(context_, devices[0], #ifdef CLPROFILE CL_QUEUE_PROFILING_ENABLE | #endif // CLPROFILE #ifdef OUTOFORDERQUEUE CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | #endif // OUTOFORDERQUEUE 0, &error); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Create command queue error: " << error); free(devices); return; } DEBUG_LOG_STATUS("Init", "queue is " << queue_); createCommandQueueSuccess = true; error = clGetDeviceInfo(devices[0], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignmentSize_), &alignmentSize_, 0); if (error != CL_SUCCESS) { // We can tolerate this, simply do not align. alignmentSize_ = 8; } // We use byte, not bits. if (alignmentSize_ % 8) { // They align on sub-byte borders? Odd architecture this must be. Give up. alignmentSize_ = 1; } else { alignmentSize_ = alignmentSize_ / 8; } // Device Extensions error = clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, 0); if (error != CL_SUCCESS) { DEBUG_LOG_ERROR("Init", "Get command queue device error: " << error); } else { error = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, 0, &length); if (error == CL_SUCCESS) { temp = new char[length+1]; error = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, length, temp, 0); deviceExtensions_ = std::string(temp); delete [] temp; } else { DEBUG_LOG_ERROR("Init", "Get device extension error: " << error); } } temp = 0; free(devices); }
END_TEST START_TEST (test_get_command_queue_info) { cl_platform_id platform = 0; cl_device_id device; cl_context ctx; cl_command_queue queue; cl_int result; union { cl_context ctx; cl_device_id device; cl_uint refcount; cl_command_queue_properties properties; } info; result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0); fail_if( result != CL_SUCCESS, "unable to get the default device" ); ctx = clCreateContext(0, 1, &device, 0, 0, &result); fail_if( result != CL_SUCCESS || ctx == 0, "unable to create a valid context" ); queue = clCreateCommandQueue(ctx, device, 0, &result); fail_if( result != CL_SUCCESS || queue == 0, "cannot create a command queue" ); result = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), (void *)&info, 0); fail_if( result != CL_SUCCESS || info.ctx != ctx, "the queue doesn't retain its context" ); result = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), (void *)&info, 0); fail_if( result != CL_SUCCESS || info.device != device, "the queue doesn't retain its device" ); result = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(cl_uint), (void *)&info, 0); fail_if( result != CL_SUCCESS || info.refcount != 1, "the queue must have a refcount of 1 when it's created" ); result = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), (void *)&info, 0); fail_if( result != CL_SUCCESS || info.properties != 0, "we gave no properties to the command queue" ); clReleaseCommandQueue(queue); clReleaseContext(ctx); }
clrngStatus clrngMrg32k3aDeviceRandomU01Array_(size_t streamCount, cl_mem streams, size_t numberCount, cl_mem outBuffer, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_bool singlePrecision) { //Check params if (streamCount < 1) return clrngSetErrorString(CLRNG_INVALID_VALUE, "%s(): streamCount cannot be less than 1", __func__); if (streams == NULL) return clrngSetErrorString(CLRNG_INVALID_VALUE, "%s(): stream_array cannot be NULL", __func__); if (numberCount < 1) return clrngSetErrorString(CLRNG_INVALID_VALUE, "%s(): numberCount cannot be less than 1", __func__); if (outBuffer == NULL) return clrngSetErrorString(CLRNG_INVALID_VALUE, "%s(): buffer cannot be NULL", __func__); if (commQueues == NULL) return clrngSetErrorString(CLRNG_INVALID_VALUE, "%s(): commQueues cannot be NULL", __func__); if (numberCount % streamCount != 0) return clrngSetErrorString(CLRNG_INVALID_VALUE, "%s(): numberCount must be a multiple of streamCount", __func__); if (numQueuesAndEvents != 1) return clrngSetErrorString(CLRNG_INVALID_VALUE, "%s(): numQueuesAndEvents can only have the value '1'", __func__); //*************************************************************************************** //Get the context cl_int err; cl_context ctx; err = clGetCommandQueueInfo(commQueues[0], CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL); if (err != CLRNG_SUCCESS) return clrngSetErrorString(err, "%s(): cannot retrieve context", __func__); //Get the Device cl_device_id dev; err = clGetCommandQueueInfo(commQueues[0], CL_QUEUE_DEVICE, sizeof(cl_device_id), &dev, NULL); if (err != CLRNG_SUCCESS) return clrngSetErrorString(err, "%s(): cannot retrieve the device", __func__); //create the program const char *sources[4] = { singlePrecision ? "#define CLRNG_SINGLE_PRECISION\n" : "", "#include <mrg32k3a.clh>\n" "__kernel void fillBufferU01(__global clrngMrg32k3aHostStream* streams, uint numberCount, __global ", singlePrecision ? "float" : "double", "* numbers) {\n" " int gid = get_global_id(0);\n" " int gsize = get_global_size(0);\n" " //Copy a stream from global stream array to local stream struct\n" " clrngMrg32k3aStream local_stream;\n" " clrngMrg32k3aCopyOverStreamsFromGlobal(1, &local_stream, &streams[gid]);\n" " // wavefront-friendly ordering\n" " for (int i = 0; i < numberCount; i++)\n" " numbers[i * gsize + gid] = clrngMrg32k3aRandomU01(&local_stream);\n" "}\n" }; cl_program program = clCreateProgramWithSource(ctx, 4, sources, NULL, &err); if (err != CLRNG_SUCCESS) return clrngSetErrorString(err, "%s(): cannot create program", __func__); // construct compiler options const char* includes = clrngGetLibraryDeviceIncludes(&err); if (err != CLRNG_SUCCESS) return (clrngStatus)err; err = clBuildProgram(program, 0, NULL, includes, NULL, NULL); if (err < 0) { // Find size of log and print to std output char *program_log; size_t log_size; clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char *)malloc(log_size + 1); program_log[log_size] = '\0'; clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL); printf("clBuildProgram fails:\n%s\n", program_log); free(program_log); exit(1); } // Create the kernel cl_kernel kernel = clCreateKernel(program, "fillBufferU01", &err); if (err != CLRNG_SUCCESS) return clrngSetErrorString(err, "%s(): cannot create kernel", __func__); //*************************************************************************************** //Random numbers generated by each work-item cl_uint number_count_per_stream = numberCount / streamCount; //Work Group Size (local_size) size_t local_size; err = clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(local_size), &local_size, NULL); if (err != CLRNG_SUCCESS) return clrngSetErrorString(err, "%s(): cannot read CL_DEVICE_MAX_WORK_GROUP_SIZE", __func__); if (local_size > streamCount) local_size = streamCount; // Set kernel arguments for kernel and enqueue that kernel. err = clSetKernelArg(kernel, 0, sizeof(streams), &streams); err |= clSetKernelArg(kernel, 1, sizeof(number_count_per_stream), &number_count_per_stream); err |= clSetKernelArg(kernel, 2, sizeof(outBuffer), &outBuffer); if (err != CLRNG_SUCCESS) return clrngSetErrorString(err, "%s(): cannot create kernel arguments", __func__); // Enqueue kernel err = clEnqueueNDRangeKernel(commQueues[0], kernel, 1, NULL, &streamCount, &local_size, numWaitEvents, waitEvents, outEvents); if (err != CLRNG_SUCCESS) return clrngSetErrorString(err, "%s(): cannot enqueue kernel", __func__); clReleaseKernel(kernel); clReleaseProgram(program); return(clrngStatus)EXIT_SUCCESS; }
PIGLIT_CL_API_TEST_CONFIG_END enum piglit_result piglit_cl_test(const int argc, const char** argv, const struct piglit_cl_api_test_config* config, const struct piglit_cl_api_test_env* env) { enum piglit_result result = PIGLIT_PASS; int i; cl_int errNo; cl_command_queue command_queue = env->context->command_queues[0]; size_t param_value_size; void* param_value; int num_command_queue_infos = PIGLIT_CL_ENUM_NUM(cl_command_queue_info, env->version); const cl_command_queue_info *command_queue_infos = PIGLIT_CL_ENUM_ARRAY(cl_command_queue_info); /*** Normal usage ***/ for(i = 0; i < num_command_queue_infos; i++) { printf("%s ", piglit_cl_get_enum_name(command_queue_infos[i])); errNo = clGetCommandQueueInfo(command_queue, command_queue_infos[i], 0, NULL, ¶m_value_size); if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Get size of %s.\n", piglit_cl_get_error_name(errNo), piglit_cl_get_enum_name(command_queue_infos[i])); piglit_merge_result(&result, PIGLIT_FAIL); continue; } param_value = malloc(param_value_size); errNo = clGetCommandQueueInfo(command_queue, command_queue_infos[i], param_value_size, param_value, NULL); if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Get value of %s.\n", piglit_cl_get_error_name(errNo), piglit_cl_get_enum_name(command_queue_infos[i])); piglit_merge_result(&result, PIGLIT_FAIL); } //TODO: output returned values printf("\n"); free(param_value); } /*** Errors ***/ /* * CL_INVALID_VALUE if param_name is not one of the supported * values or if size in bytes specified by param_value_size is * less than size of return type and param_value is not a NULL * value. */ errNo = clGetCommandQueueInfo(command_queue, CL_PLATFORM_NAME, 0, NULL, ¶m_value_size); if(!piglit_cl_check_error(errNo, CL_INVALID_VALUE)) { fprintf(stderr, "Failed (error code: %s): Trigger CL_INVALID_VALUE if param_name is not one of the supported values.\n", piglit_cl_get_error_name(errNo)); piglit_merge_result(&result, PIGLIT_FAIL); } errNo = clGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, 1, param_value, NULL); if(!piglit_cl_check_error(errNo, CL_INVALID_VALUE)) { fprintf(stderr, "Failed (error code: %s): Trigger CL_INVALID_VALUE if size in bytes specified by param_value is less than size of return type and param_value is not a NULL value.\n", piglit_cl_get_error_name(errNo)); piglit_merge_result(&result, PIGLIT_FAIL); } /* * CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command queue. */ errNo = clGetCommandQueueInfo(NULL, CL_QUEUE_CONTEXT, 0, NULL, ¶m_value_size); if(!piglit_cl_check_error(errNo, CL_INVALID_COMMAND_QUEUE)) { fprintf(stderr, "Failed (error code: %s): Trigger CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command queue.\n", piglit_cl_get_error_name(errNo)); piglit_merge_result(&result, PIGLIT_FAIL); } return result; }
static void* piglit_cl_get_info(void* fn_ptr, void* obj, cl_uint param) { cl_int errNo; size_t param_size; void* param_ptr = NULL; /* get param size */ if(fn_ptr == clGetPlatformInfo) { errNo = clGetPlatformInfo(*(cl_platform_id*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetDeviceInfo) { errNo = clGetDeviceInfo(*(cl_device_id*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetContextInfo) { errNo = clGetContextInfo(*(cl_context*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetCommandQueueInfo) { errNo = clGetCommandQueueInfo(*(cl_command_queue*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetMemObjectInfo) { errNo = clGetMemObjectInfo(*(cl_mem*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetImageInfo) { errNo = clGetImageInfo(*(cl_mem*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetSamplerInfo) { errNo = clGetSamplerInfo(*(cl_sampler*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetProgramInfo) { errNo = clGetProgramInfo(*(cl_program*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetProgramBuildInfo) { errNo = clGetProgramBuildInfo(((struct _program_build_info_args*)obj)->program, ((struct _program_build_info_args*)obj)->device, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetKernelInfo) { errNo = clGetKernelInfo(*(cl_kernel*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetKernelWorkGroupInfo) { errNo = clGetKernelWorkGroupInfo(((struct _kernel_work_group_info_args*)obj)->kernel, ((struct _kernel_work_group_info_args*)obj)->device, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetEventInfo) { errNo = clGetEventInfo(*(cl_event*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetEventProfilingInfo) { errNo = clGetEventProfilingInfo(*(cl_event*)obj, param, 0, NULL, ¶m_size); } else { fprintf(stderr, "Trying to get %s information from undefined function.\n", piglit_cl_get_enum_name(param)); piglit_report_result(PIGLIT_FAIL); } if(errNo == CL_SUCCESS) { param_ptr = calloc(param_size, sizeof(char)); /* retrieve param */ if(fn_ptr == clGetPlatformInfo) { errNo = clGetPlatformInfo(*(cl_platform_id*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetDeviceInfo) { errNo = clGetDeviceInfo(*(cl_device_id*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetContextInfo) { errNo = clGetContextInfo(*(cl_context*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetCommandQueueInfo) { errNo = clGetCommandQueueInfo(*(cl_command_queue*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetMemObjectInfo) { errNo = clGetMemObjectInfo(*(cl_mem*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetImageInfo) { errNo = clGetImageInfo(*(cl_mem*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetSamplerInfo) { errNo = clGetSamplerInfo(*(cl_sampler*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetProgramInfo) { errNo = clGetProgramInfo(*(cl_program*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetProgramBuildInfo) { errNo = clGetProgramBuildInfo(((struct _program_build_info_args*)obj)->program, ((struct _program_build_info_args*)obj)->device, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetKernelInfo) { errNo = clGetKernelInfo(*(cl_kernel*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetKernelWorkGroupInfo) { errNo = clGetKernelWorkGroupInfo(((struct _kernel_work_group_info_args*)obj)->kernel, ((struct _kernel_work_group_info_args*)obj)->device, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetEventInfo) { errNo = clGetEventInfo(*(cl_event*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetEventProfilingInfo) { errNo = clGetEventProfilingInfo(*(cl_event*)obj, param, param_size, param_ptr, NULL); } if(errNo != CL_SUCCESS) { free(param_ptr); param_ptr = NULL; } } if(param_ptr == NULL) { fprintf(stderr, "Unable to get %s information (Error: %s)\n", piglit_cl_get_enum_name(param), piglit_cl_get_error_name(errNo)); piglit_report_result(PIGLIT_FAIL); } return param_ptr; }
// the process code that the host sees static OfxStatus render( OfxImageEffectHandle instance, OfxPropertySetHandle inArgs, OfxPropertySetHandle outArgs) { // get the render window and the time from the inArgs OfxTime time; OfxRectI renderWindow; OfxStatus status = kOfxStatOK; gPropHost->propGetDouble(inArgs, kOfxPropTime, 0, &time); gPropHost->propGetIntN(inArgs, kOfxImageEffectPropRenderWindow, 4, &renderWindow.x1); // Retrieve instance data associated with this effect MyInstanceData *myData = getMyInstanceData(instance); // property handles and members of each image OfxPropertySetHandle sourceImg = NULL, outputImg = NULL; int srcRowBytes, srcBitDepth, dstRowBytes, dstBitDepth; bool srcIsAlpha, dstIsAlpha; OfxRectI dstRect, srcRect; void *src, *dst; DPRINT(("Render: window = [%d, %d - %d, %d]\n", renderWindow.x1, renderWindow.y1, renderWindow.x2, renderWindow.y2)); int isOpenCLEnabled = 0; if (gHostSupportsOpenCL) { gPropHost->propGetInt(inArgs, kOfxImageEffectPropOpenCLEnabled, 0, &isOpenCLEnabled); DPRINT(("render: OpenCL rendering %s\n", isOpenCLEnabled ? "enabled" : "DISABLED")); } cl_context clContext = NULL; cl_command_queue cmdQ = NULL; cl_device_id deviceId = NULL; if (isOpenCLEnabled) { void* voidPtrCmdQ; gPropHost->propGetPointer(inArgs, kOfxImageEffectPropOpenCLCommandQueue, 0, &voidPtrCmdQ); cmdQ = reinterpret_cast<cl_command_queue>(voidPtrCmdQ); clGetCommandQueueInfo(cmdQ, CL_QUEUE_CONTEXT, sizeof(cl_context), &clContext, NULL); clGetCommandQueueInfo(cmdQ, CL_QUEUE_DEVICE, sizeof(cl_device_id), &deviceId, NULL); } else { clContext = GetContext(deviceId); cmdQ = clCreateCommandQueue(clContext, deviceId, 0, NULL); } char deviceName[128]; clGetDeviceInfo(deviceId, CL_DEVICE_NAME, 128, deviceName, NULL); DPRINT(("Using %s for plugin\n", deviceName)); cl_kernel kernel = GetKernel(clContext); // get the source image sourceImg = ofxuGetImage(myData->sourceClip, time, srcRowBytes, srcBitDepth, srcIsAlpha, srcRect, src); // get the output image outputImg = ofxuGetImage(myData->outputClip, time, dstRowBytes, dstBitDepth, dstIsAlpha, dstRect, dst); // get the scale parameter double rGain = 1, gGain = 1, bGain = 1; gParamHost->paramGetValueAtTime(myData->rGainParam, time, &rGain); gParamHost->paramGetValueAtTime(myData->gGainParam, time, &gGain); gParamHost->paramGetValueAtTime(myData->bGainParam, time, &bGain); DPRINT(("Gain(%f %f %f)\n", rGain, gGain, bGain)); float w = (renderWindow.x2 - renderWindow.x1); float h = (renderWindow.y2 - renderWindow.y1); const size_t rowSize = w * 4 * sizeof(float); if (isOpenCLEnabled) { DPRINT(("Using OpenCL transfers (same device)\n")); RunKernel(cmdQ, deviceId, kernel, w, h, rGain, gGain, bGain, (cl_mem)src, (cl_mem)dst); } else { DPRINT(("Using CPU transfers\n")); const size_t bufferSize = w * h * 4 * sizeof(float); // Allocate the temporary buffers on the plugin device cl_mem inBuffer = clCreateBuffer(clContext, CL_MEM_READ_ONLY, bufferSize, NULL, NULL); cl_mem outBuffer = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY, bufferSize, NULL, NULL); // Copy the buffer from the CPU to the plugin device clEnqueueWriteBuffer(cmdQ, inBuffer, CL_TRUE, 0, bufferSize, src, 0, NULL, NULL); RunKernel(cmdQ, deviceId, kernel, w, h, rGain, gGain, bGain, inBuffer, outBuffer); // Copy the buffer from the plugin device to the CPU clEnqueueReadBuffer(cmdQ, outBuffer, CL_TRUE, 0, bufferSize, dst, 0, NULL, NULL); clFinish(cmdQ); // Free the temporary buffers on the plugin device clReleaseMemObject(inBuffer); clReleaseMemObject(outBuffer); } if (sourceImg) { gEffectHost->clipReleaseImage(sourceImg); } if (outputImg) { gEffectHost->clipReleaseImage(outputImg); } return status; }