// Main function // ********************************************************************* int main(int argc, char **argv) { void *srcA, *srcB, *dst; // Host buffers for OpenCL test cl_context cxGPUContext; // OpenCL context cl_command_queue cqCommandQue; // OpenCL command que cl_device_id* cdDevices; // OpenCL device list cl_program cpProgram; // OpenCL program cl_kernel ckKernel; // OpenCL kernel cl_mem cmMemObjs[3]; // OpenCL memory buffer objects: 3 for device size_t szGlobalWorkSize[1]; // 1D var for Total # of work items size_t szLocalWorkSize[1]; // 1D var for # of work items in the work group size_t szParmDataBytes; // Byte size of context information cl_int ciErr1, ciErr2; // Error code var int iTestN = 100000 * 8; // Size of Vectors to process int actualGlobalSize = iTestN>>3; // set Global and Local work size dimensions szGlobalWorkSize[0] = iTestN >> 3; // do 8 computations per work item szLocalWorkSize[0]= iTestN>>3; // Allocate and initialize host arrays srcA = (void *)malloc (sizeof(cl_float) * iTestN); srcB = (void *)malloc (sizeof(cl_float) * iTestN); dst = (void *)malloc (sizeof(cl_float) * iTestN); int i; // Initialize arrays with some values for (i=0;i<iTestN;i++) { ((cl_float*)srcA)[i] = cl_float(i); ((cl_float*)srcB)[i] = 2; ((cl_float*)dst)[i]=-1; } cl_uint numPlatforms; cl_platform_id platform = NULL; cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); if (0 < numPlatforms) { cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status = clGetPlatformIDs(numPlatforms, platforms, NULL); for (unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); platform = platforms[i]; if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) { break; } } delete[] platforms; } cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; // Create OpenCL context & context cxGPUContext = clCreateContextFromType(cps, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErr1); //could also be CL_DEVICE_TYPE_GPU // Query all devices available to the context ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); cdDevices = (cl_device_id*)malloc(szParmDataBytes); ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); if (cdDevices) { printDevInfo(cdDevices[0]); } // Create a command queue for first device the context reported cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr2); ciErr1 |= ciErr2; // Allocate the OpenCL source and result buffer memory objects on the device GMEM cmMemObjs[0] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcA, &ciErr2); ciErr1 |= ciErr2; cmMemObjs[1] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcB, &ciErr2); ciErr1 |= ciErr2; cmMemObjs[2] = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float8) * szGlobalWorkSize[0], NULL, &ciErr2); ciErr1 |= ciErr2; ///create kernels from binary int numDevices = 1; ::size_t* lengths = (::size_t*) malloc(numDevices * sizeof(::size_t)); const unsigned char** images = (const unsigned char**) malloc(numDevices * sizeof(const void*)); for (i = 0; i < numDevices; ++i) { images[i] = 0; lengths[i] = 0; } // Read the OpenCL kernel in from source file const char* cSourceFile = "VectorAddKernels.cl"; printf("loadProgSource (%s)...\n", cSourceFile); const char* cPathAndName = cSourceFile; #ifdef LOAD_FROM_FILE size_t szKernelLength; const char* cSourceCL = loadProgSource(cPathAndName, "", &szKernelLength); #else const char* cSourceCL = stringifiedSourceCL; size_t szKernelLength = strlen(stringifiedSourceCL); #endif //LOAD_FROM_FILE // Create the program cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1); printf("clCreateProgramWithSource...\n"); if (ciErr1 != CL_SUCCESS) { printf("Error in clCreateProgramWithSource, Line %u in file %s !!!\n\n", __LINE__, __FILE__); exit(0); } // Build the program with 'mad' Optimization option #ifdef MAC char* flags = "-cl-mad-enable -DMAC -DGUID_ARG"; #else const char* flags = "-DGUID_ARG="; #endif ciErr1 = clBuildProgram(cpProgram, 0, NULL, flags, NULL, NULL); printf("clBuildProgram...\n"); if (ciErr1 != CL_SUCCESS) { printf("Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__); exit(0); } // Create the kernel ckKernel = clCreateKernel(cpProgram, "VectorAdd", &ciErr1); printf("clCreateKernel (VectorAdd)...\n"); if (ciErr1 != CL_SUCCESS) { printf("Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__); exit(0); } cl_int ciErrNum; ciErrNum = clGetKernelWorkGroupInfo(ckKernel, cdDevices[0], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL); if (ciErrNum != CL_SUCCESS) { printf("cannot get workgroup size\n"); exit(0); } // Set the Argument values ciErr1 |= clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmMemObjs[0]); ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&cmMemObjs[1]); ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmMemObjs[2]); int workgroupSize = wgSize; if(workgroupSize <= 0) { // let OpenCL library calculate workgroup size size_t globalWorkSize[2]; globalWorkSize[0] = actualGlobalSize; globalWorkSize[1] = 1; // Copy input data from host to GPU and launch kernel ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, globalWorkSize, NULL, 0,0,0 ); } else { size_t localWorkSize[2], globalWorkSize[2]; workgroupSize = btMin(workgroupSize, actualGlobalSize); int num_t = actualGlobalSize / workgroupSize; int num_g = num_t * workgroupSize; if(num_g < actualGlobalSize) { num_t++; //this can cause problems -> processing outside of the buffer //make sure to check kernel } size_t globalThreads[] = {num_t * workgroupSize}; size_t localThreads[] = {workgroupSize}; localWorkSize[0] = workgroupSize; globalWorkSize[0] = num_t * workgroupSize; localWorkSize[1] = 1; globalWorkSize[1] = 1; // Copy input data from host to GPU and launch kernel ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); } if (ciErrNum != CL_SUCCESS) { printf("cannot clEnqueueNDRangeKernel\n"); exit(0); } clFinish(cqCommandQue); // Read back results and check accumulated errors ciErr1 |= clEnqueueReadBuffer(cqCommandQue, cmMemObjs[2], CL_TRUE, 0, sizeof(cl_float8) * szGlobalWorkSize[0], dst, 0, NULL, NULL); // Release kernel, program, and memory objects // NOTE: Most properly this should be done at any of the exit points above, but it is omitted elsewhere for clarity. free(cdDevices); clReleaseKernel(ckKernel); clReleaseProgram(cpProgram); clReleaseCommandQueue(cqCommandQue); clReleaseContext(cxGPUContext); // print the results int iErrorCount = 0; for (i = 0; i < iTestN; i++) { if (((float*)dst)[i] != ((float*)srcA)[i]+((float*)srcB)[i]) iErrorCount++; } if (iErrorCount) { printf("MiniCL validation FAILED\n"); } else { printf("MiniCL validation SUCCESSFULL\n"); } // Free host memory, close log and return success for (i = 0; i < 3; i++) { clReleaseMemObject(cmMemObjs[i]); } free(srcA); free(srcB); free (dst); printf("Press ENTER to quit\n"); getchar(); }
int PGR_radiosity::prepareCL() { cl_int ciErr = CL_SUCCESS; // Get Platform cl_platform_id *cpPlatforms; cl_uint cuiPlatformsCount; ciErr = clGetPlatformIDs(0, NULL, &cuiPlatformsCount); this->CheckOpenCLError(ciErr, "clGetPlatformIDs: cuiPlatformsNum=%i", cuiPlatformsCount); cpPlatforms = (cl_platform_id*) malloc(cuiPlatformsCount * sizeof (cl_platform_id)); ciErr = clGetPlatformIDs(cuiPlatformsCount, cpPlatforms, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformIDs"); cl_platform_id platform = 0; const unsigned int TMP_BUFFER_SIZE = 1024; char sTmp[TMP_BUFFER_SIZE]; for (unsigned int f0 = 0; f0 < cuiPlatformsCount; f0++) { //bool shouldBrake = false; ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_PROFILE=%s", f0, sTmp); ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VERSION, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VERSION=%s", f0, sTmp); ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_NAME, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_NAME=%s", f0, sTmp); ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VENDOR=%s", f0, sTmp); //prioritize AMD and CUDA platforms if ((strcmp(sTmp, "NVIDIA Corporation") == 0)) { platform = cpPlatforms[f0]; } // if ((strcmp(sTmp, "Advanced Micro Devices, Inc.") == 0)) // { // platform = cpPlatforms[f0]; // } //prioritize Intel /*if ((strcmp(sTmp, "Intel(R) Corporation") == 0)) { platform = cpPlatforms[f0]; }*/ ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_EXTENSIONS=%s", f0, sTmp); } if (platform == 0) { //no prioritized found if (cuiPlatformsCount > 0) { platform = cpPlatforms[0]; } else { cerr << "No device was found" << endl; return -1; } } // Get Devices cl_uint cuiDevicesCount; ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &cuiDevicesCount); CheckOpenCLError(ciErr, "clGetDeviceIDs: cuiDevicesCount=%i", cuiDevicesCount); cl_device_id *cdDevices = (cl_device_id*) malloc(cuiDevicesCount * sizeof (cl_device_id)); ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, cuiDevicesCount, cdDevices, NULL); CheckOpenCLError(ciErr, "clGetDeviceIDs"); unsigned int deviceIndex = 0; for (unsigned int f0 = 0; f0 < cuiDevicesCount; f0++) { cl_device_type cdtTmp; size_t iDim[3]; ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_TYPE, sizeof (cdtTmp), &cdtTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_TYPE=%s%s%s%s", f0, cdtTmp & CL_DEVICE_TYPE_CPU ? "CPU," : "", cdtTmp & CL_DEVICE_TYPE_GPU ? "GPU," : "", cdtTmp & CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR," : "", cdtTmp & CL_DEVICE_TYPE_DEFAULT ? "DEFAULT," : ""); if (cdtTmp & CL_DEVICE_TYPE_GPU) { //prioritize gpu if both cpu and gpu are available deviceIndex = f0; } cl_bool bTmp; ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_AVAILABLE, sizeof (bTmp), &bTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_AVAILABLE=%s", f0, bTmp ? "YES" : "NO"); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_NAME, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_NAME=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VENDOR=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DRIVER_VERSION, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DRIVER_VERSION=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_PROFILE=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VERSION, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VERSION=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof (iDim), iDim, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_ITEM_SIZES=%ix%ix%i", f0, iDim[0], iDim[1], iDim[2]); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t), iDim, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_GROUP_SIZE=%i", f0, iDim[0]); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_EXTENSIONS=%s", f0, sTmp); } cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0}; /* Create context */ this->context = clCreateContext(cps, 1, &cdDevices[deviceIndex], NULL, NULL, &ciErr); CheckOpenCLError(ciErr, "clCreateContext"); /* Create a command queue */ this->queue = clCreateCommandQueue(this->context, cdDevices[deviceIndex], CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &ciErr); CheckOpenCLError(ciErr, "clCreateCommandQueue"); /* Create and compile and openCL program */ char *cSourceCL = loadProgSource("kernels.cl"); this->program = clCreateProgramWithSource(this->context, 1, (const char **) &cSourceCL, NULL, &ciErr); CheckOpenCLError(ciErr, "clCreateProgramWithSource"); free(cSourceCL); ciErr = clBuildProgram(this->program, 0, NULL, NULL, NULL, NULL); CheckOpenCLError(ciErr, "clBuildProgram"); cl_int logStatus; char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo(this->program, cdDevices[deviceIndex], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize); CheckOpenCLError(logStatus, "clGetProgramBuildInfo."); buildLog = (char*) malloc(buildLogSize); if (buildLog == NULL) { printf("Failed to allocate host memory. (buildLog)"); return -1; } memset(buildLog, 0, buildLogSize); logStatus = clGetProgramBuildInfo(this->program, cdDevices[deviceIndex], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL); CheckOpenCLError(logStatus, "clGetProgramBuildInfo."); free(buildLog); size_t tempKernelWorkGroupSize; /* Create kernels */ this->radiosityKernel = clCreateKernel(program, "radiosity", &ciErr); CheckOpenCLError(ciErr, "clCreateKernel radiosity"); this->sortKernel = clCreateKernel(program, "sort", &ciErr); CheckOpenCLError(ciErr, "clCreateKernel sort"); this->maxWorkGroupSize = 64; this->workGroupSize = 64; ciErr = clGetKernelWorkGroupInfo(this->radiosityKernel, cdDevices[deviceIndex], CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &tempKernelWorkGroupSize, 0); CheckOpenCLError(ciErr, "clGetKernelInfo"); this->maxWorkGroupSize = MIN(tempKernelWorkGroupSize, this->maxWorkGroupSize); if (this->workGroupSize > this->maxWorkGroupSize) { cout << "Out of Resources!" << endl; cout << "Group Size specified: " << this->workGroupSize << endl; cout << "Max Group Size supported on the kernel: " << this->maxWorkGroupSize << endl; cout << "Falling back to " << this->maxWorkGroupSize << endl; this->workGroupSize = this->maxWorkGroupSize; } /* Allocate buffer of colors */ this->patchesColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer patchesCL"); this->raw_patchesColors = new cl_uchar3[this->model->getPatchesCount()]; this->raw_patchesEnergies = new cl_float[this->model->getPatchesCount()]; this->raw_diffColors = new cl_uchar3[this->model->getPatchesCount()]; this->raw_intensities = new cl_float[this->model->getPatchesCount()]; this->model->getPatchesCL(this->raw_patchesColors, this->raw_patchesEnergies); ciErr = clEnqueueWriteBuffer(this->queue, this->patchesColorsCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_uchar3), this->raw_patchesColors, 0, 0, 0); CheckOpenCLError(ciErr, "Copy patches colors"); /* Alocate buffer of energies */ this->patchesEnergiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer patchesCL"); ciErr = clEnqueueWriteBuffer(this->queue, this->patchesEnergiesCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_float), this->raw_patchesEnergies, 0, 0, 0); CheckOpenCLError(ciErr, "Copy patches"); /* Allocate buffer of patches geometry */ this->patchesGeoCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->model->getPatchesCount() * sizeof (cl_float8), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer patchesGeometryCL"); this->raw_patchesGeo = new cl_float8[this->model->getPatchesCount()]; this->model->getPatchesGeometryCL(raw_patchesGeo); ciErr = clEnqueueWriteBuffer(this->queue, this->patchesGeoCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_float8), this->raw_patchesGeo, 0, 0, 0); CheckOpenCLError(ciErr, "Copy patches geometry"); this->indicesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * sizeof (cl_uint), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer indicesCL"); this->indicesCountCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_uint), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer indicesCountCL"); this->maximalEnergyCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_float), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer maximalEnergyCL"); this->diffColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer diffColorsCL"); cl_uchar3* zeros = new cl_uchar3[this->model->getPatchesCount()]; memset(zeros, 0, this->model->getPatchesCount() * sizeof (cl_uchar3)); ciErr = clEnqueueWriteBuffer(this->queue, this->diffColorsCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_uchar3), zeros, 0, 0, 0); CheckOpenCLError(ciErr, "Clear diff colors"); delete [] zeros; this->intensitiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer intensitiesCL"); cl_float* zeroIntensity = new cl_float[this->model->getPatchesCount()]; memset(zeroIntensity, 0, this->model->getPatchesCount() * sizeof (cl_float)); ciErr = clEnqueueWriteBuffer(this->queue, this->intensitiesCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_float), zeroIntensity, 0, 0, 0); CheckOpenCLError(ciErr, "Clear intensities"); delete [] zeroIntensity; this->texturesCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->maxWorkGroupSize * 768 * 256 * sizeof (cl_uchar3), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer texturesCL"); this->visitedCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer visitedCL"); cl_bool* zeroVisited = new cl_bool[this->maxWorkGroupSize * this->model->getPatchesCount()]; memset(zeroVisited, 0, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool)); ciErr = clEnqueueWriteBuffer(this->queue, this->visitedCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_bool), zeroVisited, 0, 0, 0); CheckOpenCLError(ciErr, "Clear visited flags"); delete [] zeroVisited; free(cdDevices); return 0; }