Esempio n. 1
0
// Main function 
// *********************************************************************
int main(int argc, char **argv)
{
	void *srcA, *srcB, *dst;        // Host buffers for OpenCL test
    cl_context cxGPUContext;       // OpenCL context
    cl_command_queue cqCommandQue;  // OpenCL command que
    cl_device_id* cdDevices;        // OpenCL device list    
    cl_program cpProgram;           // OpenCL program
    cl_kernel ckKernel;             // OpenCL kernel
    cl_mem cmMemObjs[3];            // OpenCL memory buffer objects:  3 for device
    size_t szGlobalWorkSize[1];     // 1D var for Total # of work items
    size_t szLocalWorkSize[1];		// 1D var for # of work items in the work group	
    size_t szParmDataBytes;			// Byte size of context information
    cl_int ciErr1, ciErr2;			// Error code var
    int iTestN = 100000 * 8;		// Size of Vectors to process

	int actualGlobalSize = iTestN>>3;
	
    // set Global and Local work size dimensions
    szGlobalWorkSize[0] = iTestN >> 3;  // do 8 computations per work item
    szLocalWorkSize[0]= iTestN>>3;
	
	
    // Allocate and initialize host arrays
    srcA = (void *)malloc (sizeof(cl_float) * iTestN);
    srcB = (void *)malloc (sizeof(cl_float) * iTestN);
    dst = (void *)malloc (sizeof(cl_float) * iTestN);

	int i;

	// Initialize arrays with some values
	for (i=0;i<iTestN;i++)
	{
		((cl_float*)srcA)[i] = cl_float(i);
		((cl_float*)srcB)[i] = 2;
		((cl_float*)dst)[i]=-1;
	}


	 cl_uint numPlatforms;
    cl_platform_id platform = NULL;
    cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);

    if (0 < numPlatforms) 
    {
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            char pbuf[100];
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(pbuf),
                                       pbuf,
                                       NULL);

            platform = platforms[i];
            if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) 
            {
                break;
            }
        }
        delete[] platforms;
    }

	cl_context_properties cps[3] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        0
    };

    // Create OpenCL context & context
    cxGPUContext = clCreateContextFromType(cps, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErr1); //could also be CL_DEVICE_TYPE_GPU
	
    // Query all devices available to the context
    ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
    cdDevices = (cl_device_id*)malloc(szParmDataBytes);
    ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
	if (cdDevices)
	{
		printDevInfo(cdDevices[0]);
	}

    // Create a command queue for first device the context reported
    cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr2);
    ciErr1 |= ciErr2; 

    // Allocate the OpenCL source and result buffer memory objects on the device GMEM
    cmMemObjs[0] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcA, &ciErr2);
    ciErr1 |= ciErr2;
    cmMemObjs[1] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float8) * szGlobalWorkSize[0], srcB, &ciErr2);
    ciErr1 |= ciErr2;
    cmMemObjs[2] = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float8) * szGlobalWorkSize[0], NULL, &ciErr2);
    ciErr1 |= ciErr2;

///create kernels from binary
	int numDevices = 1;
	::size_t* lengths = (::size_t*) malloc(numDevices * sizeof(::size_t));
	const unsigned char** images = (const unsigned char**) malloc(numDevices * sizeof(const void*));

	for (i = 0; i < numDevices; ++i) {
		images[i] = 0;
		lengths[i] = 0;
	}

	
	// Read the OpenCL kernel in from source file
	const char* cSourceFile = "VectorAddKernels.cl";
	
    printf("loadProgSource (%s)...\n", cSourceFile); 
    const char* cPathAndName = cSourceFile;
#ifdef LOAD_FROM_FILE
	size_t szKernelLength;
    const char* cSourceCL = loadProgSource(cPathAndName, "", &szKernelLength);
#else
	const char* cSourceCL = stringifiedSourceCL;
	size_t szKernelLength = strlen(stringifiedSourceCL);
#endif //LOAD_FROM_FILE


	
    // Create the program
    cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);
    printf("clCreateProgramWithSource...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        printf("Error in clCreateProgramWithSource, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
        exit(0);
    }
	
    // Build the program with 'mad' Optimization option
#ifdef MAC
	char* flags = "-cl-mad-enable -DMAC -DGUID_ARG";
#else
	const char* flags = "-DGUID_ARG=";
#endif
    ciErr1 = clBuildProgram(cpProgram, 0, NULL, flags, NULL, NULL);
    printf("clBuildProgram...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        printf("Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
        exit(0);
    }
	
    // Create the kernel
    ckKernel = clCreateKernel(cpProgram, "VectorAdd", &ciErr1);
    printf("clCreateKernel (VectorAdd)...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        printf("Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
		exit(0);
    }
	
	
	cl_int ciErrNum;
	
	ciErrNum = clGetKernelWorkGroupInfo(ckKernel, cdDevices[0], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
	if (ciErrNum != CL_SUCCESS)
	{
		printf("cannot get workgroup size\n");
		exit(0);
	}

	

   
    // Set the Argument values
    ciErr1 |= clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmMemObjs[0]);
    ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&cmMemObjs[1]);
    ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmMemObjs[2]);

	
	
	int workgroupSize = wgSize;
	if(workgroupSize <= 0)
	{ // let OpenCL library calculate workgroup size
		size_t globalWorkSize[2];
		globalWorkSize[0] = actualGlobalSize;
		globalWorkSize[1] = 1;
	
		// Copy input data from host to GPU and launch kernel 
		ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, globalWorkSize, NULL, 0,0,0 );

	}
	else
	{
		size_t localWorkSize[2], globalWorkSize[2];
		workgroupSize = btMin(workgroupSize, actualGlobalSize);
		int num_t = actualGlobalSize / workgroupSize;
		int num_g = num_t * workgroupSize;
		if(num_g < actualGlobalSize)
		{
			num_t++;
			//this can cause problems -> processing outside of the buffer
			//make sure to check kernel
		}

		size_t globalThreads[] = {num_t * workgroupSize};
		size_t localThreads[] = {workgroupSize};


		localWorkSize[0]  = workgroupSize;
		globalWorkSize[0] = num_t * workgroupSize;
		localWorkSize[1] = 1;
		globalWorkSize[1] = 1;

		// Copy input data from host to GPU and launch kernel 
		ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL);

	}
	
	if (ciErrNum != CL_SUCCESS)
	{
		printf("cannot clEnqueueNDRangeKernel\n");
		exit(0);
	}
	
	clFinish(cqCommandQue);
    // Read back results and check accumulated errors
    ciErr1 |= clEnqueueReadBuffer(cqCommandQue, cmMemObjs[2], CL_TRUE, 0, sizeof(cl_float8) * szGlobalWorkSize[0], dst, 0, NULL, NULL);

    // Release kernel, program, and memory objects
	// NOTE:  Most properly this should be done at any of the exit points above, but it is omitted elsewhere for clarity.
    free(cdDevices);
	clReleaseKernel(ckKernel);  
    clReleaseProgram(cpProgram);
    clReleaseCommandQueue(cqCommandQue);
    clReleaseContext(cxGPUContext);


    // print the results
    int iErrorCount = 0;
    for (i = 0; i < iTestN; i++) 
    {
		if (((float*)dst)[i] != ((float*)srcA)[i]+((float*)srcB)[i])
			iErrorCount++;
    }
	
	if (iErrorCount)
	{
		printf("MiniCL validation FAILED\n");
	} else
	{
		printf("MiniCL validation SUCCESSFULL\n");
	}
    // Free host memory, close log and return success
	for (i = 0; i < 3; i++)
    {
        clReleaseMemObject(cmMemObjs[i]);
    }

    free(srcA); 
    free(srcB);
    free (dst);
	printf("Press ENTER to quit\n");
	getchar();
}
int PGR_radiosity::prepareCL()
{
    cl_int ciErr = CL_SUCCESS;

    // Get Platform
    cl_platform_id *cpPlatforms;
    cl_uint cuiPlatformsCount;
    ciErr = clGetPlatformIDs(0, NULL, &cuiPlatformsCount);
    this->CheckOpenCLError(ciErr, "clGetPlatformIDs: cuiPlatformsNum=%i", cuiPlatformsCount);
    cpPlatforms = (cl_platform_id*) malloc(cuiPlatformsCount * sizeof (cl_platform_id));
    ciErr = clGetPlatformIDs(cuiPlatformsCount, cpPlatforms, NULL);
    this->CheckOpenCLError(ciErr, "clGetPlatformIDs");

    cl_platform_id platform = 0;

    const unsigned int TMP_BUFFER_SIZE = 1024;
    char sTmp[TMP_BUFFER_SIZE];

    for (unsigned int f0 = 0; f0 < cuiPlatformsCount; f0++)
    {
        //bool shouldBrake = false;
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_PROFILE=%s", f0, sTmp);
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VERSION, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VERSION=%s", f0, sTmp);
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_NAME, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_NAME=%s", f0, sTmp);
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VENDOR=%s", f0, sTmp);

        //prioritize AMD and CUDA platforms

        if ((strcmp(sTmp, "NVIDIA Corporation") == 0))
        {
            platform = cpPlatforms[f0];
        }

        //        if ((strcmp(sTmp, "Advanced Micro Devices, Inc.") == 0))
        //        {
        //            platform = cpPlatforms[f0];
        //        }

        //prioritize Intel
        /*if ((strcmp(sTmp, "Intel(R) Corporation") == 0)) {
            platform = cpPlatforms[f0];
        }*/

        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_EXTENSIONS=%s", f0, sTmp);
    }

    if (platform == 0)
    { //no prioritized found
        if (cuiPlatformsCount > 0)
        {
            platform = cpPlatforms[0];
        }
        else
        {
            cerr << "No device was found" << endl;
            return -1;
        }
    }
    // Get Devices
    cl_uint cuiDevicesCount;
    ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &cuiDevicesCount);
    CheckOpenCLError(ciErr, "clGetDeviceIDs: cuiDevicesCount=%i", cuiDevicesCount);
    cl_device_id *cdDevices = (cl_device_id*) malloc(cuiDevicesCount * sizeof (cl_device_id));
    ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, cuiDevicesCount, cdDevices, NULL);
    CheckOpenCLError(ciErr, "clGetDeviceIDs");

    unsigned int deviceIndex = 0;

    for (unsigned int f0 = 0; f0 < cuiDevicesCount; f0++)
    {
        cl_device_type cdtTmp;
        size_t iDim[3];

        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_TYPE, sizeof (cdtTmp), &cdtTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_TYPE=%s%s%s%s", f0, cdtTmp & CL_DEVICE_TYPE_CPU ? "CPU," : "",
                         cdtTmp & CL_DEVICE_TYPE_GPU ? "GPU," : "",
                         cdtTmp & CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR," : "",
                         cdtTmp & CL_DEVICE_TYPE_DEFAULT ? "DEFAULT," : "");

        if (cdtTmp & CL_DEVICE_TYPE_GPU)
        { //prioritize gpu if both cpu and gpu are available
            deviceIndex = f0;
        }

        cl_bool bTmp;
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_AVAILABLE, sizeof (bTmp), &bTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_AVAILABLE=%s", f0, bTmp ? "YES" : "NO");
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_NAME, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_NAME=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VENDOR=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DRIVER_VERSION, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DRIVER_VERSION=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_PROFILE=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VERSION, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VERSION=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof (iDim), iDim, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_ITEM_SIZES=%ix%ix%i", f0, iDim[0], iDim[1], iDim[2]);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t), iDim, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_GROUP_SIZE=%i", f0, iDim[0]);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_EXTENSIONS=%s", f0, sTmp);
    }

    cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0};



    /* Create context */
    this->context = clCreateContext(cps, 1, &cdDevices[deviceIndex], NULL, NULL, &ciErr);
    CheckOpenCLError(ciErr, "clCreateContext");

    /* Create a command queue */
    this->queue = clCreateCommandQueue(this->context, cdDevices[deviceIndex], CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &ciErr);
    CheckOpenCLError(ciErr, "clCreateCommandQueue");

    /* Create and compile and openCL program */
    char *cSourceCL = loadProgSource("kernels.cl");

    this->program = clCreateProgramWithSource(this->context, 1, (const char **) &cSourceCL, NULL, &ciErr);
    CheckOpenCLError(ciErr, "clCreateProgramWithSource");
    free(cSourceCL);

    ciErr = clBuildProgram(this->program, 0, NULL, NULL, NULL, NULL);
    CheckOpenCLError(ciErr, "clBuildProgram");

    cl_int logStatus;
    char *buildLog = NULL;
    size_t buildLogSize = 0;
    logStatus = clGetProgramBuildInfo(this->program,
                                      cdDevices[deviceIndex],
                                      CL_PROGRAM_BUILD_LOG,
                                      buildLogSize,
                                      buildLog,
                                      &buildLogSize);

    CheckOpenCLError(logStatus, "clGetProgramBuildInfo.");

    buildLog = (char*) malloc(buildLogSize);
    if (buildLog == NULL)
    {
        printf("Failed to allocate host memory. (buildLog)");
        return -1;
    }
    memset(buildLog, 0, buildLogSize);

    logStatus = clGetProgramBuildInfo(this->program,
                                      cdDevices[deviceIndex],
                                      CL_PROGRAM_BUILD_LOG,
                                      buildLogSize,
                                      buildLog,
                                      NULL);
    CheckOpenCLError(logStatus, "clGetProgramBuildInfo.");
    free(buildLog);

    size_t tempKernelWorkGroupSize;

    /* Create kernels */
    this->radiosityKernel = clCreateKernel(program, "radiosity", &ciErr);
    CheckOpenCLError(ciErr, "clCreateKernel radiosity");
    this->sortKernel = clCreateKernel(program, "sort", &ciErr);
    CheckOpenCLError(ciErr, "clCreateKernel sort");

    this->maxWorkGroupSize = 64;
    this->workGroupSize = 64;

    ciErr = clGetKernelWorkGroupInfo(this->radiosityKernel,
                                     cdDevices[deviceIndex],
                                     CL_KERNEL_WORK_GROUP_SIZE,
                                     sizeof (size_t),
                                     &tempKernelWorkGroupSize,
                                     0);
    CheckOpenCLError(ciErr, "clGetKernelInfo");
    this->maxWorkGroupSize = MIN(tempKernelWorkGroupSize, this->maxWorkGroupSize);

    if (this->workGroupSize > this->maxWorkGroupSize)
    {
        cout << "Out of Resources!" << endl;
        cout << "Group Size specified: " << this->workGroupSize << endl;
        cout << "Max Group Size supported on the kernel: " << this->maxWorkGroupSize << endl;
        cout << "Falling back to " << this->maxWorkGroupSize << endl;

        this->workGroupSize = this->maxWorkGroupSize;
    }


    /* Allocate buffer of colors */
    this->patchesColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer patchesCL");

    this->raw_patchesColors = new cl_uchar3[this->model->getPatchesCount()];
    this->raw_patchesEnergies = new cl_float[this->model->getPatchesCount()];
    this->raw_diffColors = new cl_uchar3[this->model->getPatchesCount()];
    this->raw_intensities = new cl_float[this->model->getPatchesCount()];
    this->model->getPatchesCL(this->raw_patchesColors, this->raw_patchesEnergies);

    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->patchesColorsCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_uchar3),
                                 this->raw_patchesColors,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Copy patches colors");

    /* Alocate buffer of energies */
    this->patchesEnergiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer patchesCL");

    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->patchesEnergiesCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_float),
                                 this->raw_patchesEnergies,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Copy patches");

    /* Allocate buffer of patches geometry */
    this->patchesGeoCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->model->getPatchesCount() * sizeof (cl_float8), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer patchesGeometryCL");

    this->raw_patchesGeo = new cl_float8[this->model->getPatchesCount()];
    this->model->getPatchesGeometryCL(raw_patchesGeo);
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->patchesGeoCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_float8),
                                 this->raw_patchesGeo,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Copy patches geometry");


    this->indicesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * sizeof (cl_uint), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer indicesCL");

    this->indicesCountCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_uint), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer indicesCountCL");

    this->maximalEnergyCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_float), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer maximalEnergyCL");

    this->diffColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer diffColorsCL");

    cl_uchar3* zeros = new cl_uchar3[this->model->getPatchesCount()];
    memset(zeros, 0, this->model->getPatchesCount() * sizeof (cl_uchar3));
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->diffColorsCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_uchar3),
                                 zeros,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Clear diff colors");
    delete [] zeros;

    this->intensitiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer intensitiesCL");

    cl_float* zeroIntensity = new cl_float[this->model->getPatchesCount()];
    memset(zeroIntensity, 0, this->model->getPatchesCount() * sizeof (cl_float));
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->intensitiesCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_float),
                                 zeroIntensity,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Clear intensities");
    delete [] zeroIntensity;

    this->texturesCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->maxWorkGroupSize * 768 * 256 * sizeof (cl_uchar3), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer texturesCL");

    this->visitedCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer visitedCL");

    cl_bool* zeroVisited = new cl_bool[this->maxWorkGroupSize * this->model->getPatchesCount()];
    memset(zeroVisited, 0, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool));
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->visitedCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_bool),
                                 zeroVisited,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Clear visited flags");
    delete [] zeroVisited;

    free(cdDevices);

    return 0;
}