Пример #1
0
void PGR_radiosity::computeRadiosityCL()
{
    /* Prepare CL structures */
    if (this->prepareCL() != 0)
    {
        this->releaseCL();
        return;
    }

    /* Run OpenCL kernel that computes radiosity. It includes a loop */
    this->runRadiosityKernelCL();

    clFinish(this->queue);

    int status = clEnqueueReadBuffer(this->queue,
                                     this->diffColorsCL,
                                     CL_TRUE, //blocking read
                                     0,
                                     this->model->getPatchesCount() * sizeof (cl_uchar3),
                                     this->raw_diffColors,
                                     0,
                                     0,
                                     0);
    CheckOpenCLError(status, "Read diffColors.");

    status = clEnqueueReadBuffer(this->queue,
                                 this->intensitiesCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_float),
                                 this->raw_intensities,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(status, "Read intensities.");

    /* Decode opencl memory objects */
    this->model->decodeData(this->raw_diffColors, this->raw_intensities, this->model->getPatchesCount());

    this->model->recomputeColors();

    this->model->updateArrays();

    /* Release CL structures */
    this->releaseCL();
}
Пример #2
0
// OpenCL functions
int InitialiseCLEnvironment(cl_platform_id **platform, cl_device_id ***device_id, cl_program *program, renderStruct *render)
{
	// error flag
	cl_int err;
	char infostring[1024];
	char deviceInfo[1024];

	// need to ensure platform supports OpenGL OpenCL interop before querying devices
	// to avoid segfault when calling clGetGLContextInfoKHR
	int *platformSupportsInterop;

	//get kernel from file
	FILE* kernelFile = fopen(kernelFileName, "rb");
	fseek(kernelFile, 0, SEEK_END);
	long fileLength = ftell(kernelFile);
	rewind(kernelFile);
	char *kernelSource = malloc(fileLength*sizeof(char));
	long read = fread(kernelSource, sizeof(char), fileLength, kernelFile);
	if (fileLength != read) printf("Error reading kernel file, line %d\n", __LINE__);
	fclose(kernelFile);

	//get platform and device information
	cl_uint numPlatforms;
	err = clGetPlatformIDs(0, NULL, &numPlatforms);
	*platform = malloc(numPlatforms * sizeof(cl_platform_id));
	*device_id = malloc(numPlatforms * sizeof(cl_device_id*));
	platformSupportsInterop = malloc(numPlatforms * sizeof(*platformSupportsInterop));
	err |= clGetPlatformIDs(numPlatforms, *platform, NULL);
	CheckOpenCLError(err, __LINE__);
	cl_uint *numDevices;
	numDevices = malloc(numPlatforms * sizeof(cl_uint));

	for (cl_uint i = 0; i < numPlatforms; i++) {
		clGetPlatformInfo((*platform)[i], CL_PLATFORM_VENDOR, sizeof(infostring), infostring, NULL);
		printf("\n---OpenCL: Platform Vendor %d: %s\n", i, infostring);

		err = clGetDeviceIDs((*platform)[i], CL_DEVICE_TYPE_ALL, 0, NULL, &(numDevices[i]));
		CheckOpenCLError(err, __LINE__);
		(*device_id)[i] = malloc(numDevices[i] * sizeof(cl_device_id));
		platformSupportsInterop[i] = 0;
		err = clGetDeviceIDs((*platform)[i], CL_DEVICE_TYPE_ALL, numDevices[i], (*device_id)[i], NULL);
		CheckOpenCLError(err, __LINE__);
		for (cl_uint j = 0; j < numDevices[i]; j++) {
			char deviceName[200];
			clGetDeviceInfo((*device_id)[i][j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
			printf("---OpenCL:    Device found %d. %s\n", j, deviceName);
			clGetDeviceInfo((*device_id)[i][j], CL_DEVICE_EXTENSIONS, sizeof(deviceInfo), deviceInfo, NULL);
			if (strstr(deviceInfo, "cl_khr_gl_sharing") != NULL) {
				printf("---OpenCL:        cl_khr_gl_sharing supported!\n");
				platformSupportsInterop[i] = 1;
			}
			else {
				printf("---OpenCL:        cl_khr_gl_sharing NOT supported!\n");
				platformSupportsInterop[i] |= 0;
			}
			if (strstr(deviceInfo, "cl_khr_fp64") != NULL) {
				printf("---OpenCL:        cl_khr_fp64 supported!\n");
			}
			else {
				printf("---OpenCL:        cl_khr_fp64 NOT supported!\n");
			}
		}
	}
	printf("\n");


	////////////////////////////////
	// This part is different to how we usually do things. Need to get context and device from existing
	// OpenGL context. Loop through all platforms looking for the device:
	cl_device_id device = NULL;
	int deviceFound = 0;
	cl_uint checkPlatform = 0;

#ifdef TRYINTEROP
	while (!deviceFound) {
		if (platformSupportsInterop[checkPlatform]) {
			printf("---OpenCL: Looking for OpenGL Context device on platform %d ... ", checkPlatform);
			clGetGLContextInfoKHR_fn pclGetGLContextInfoKHR;
			PTR_FUNC_PTR pclGetGLContextInfoKHR = clGetExtensionFunctionAddressForPlatform((*platform)[checkPlatform], "clGetGLContextInfoKHR");
			cl_context_properties properties[] = {
				CL_GL_CONTEXT_KHR, (cl_context_properties) glfwGetGLXContext(render->window),
				CL_GLX_DISPLAY_KHR, (cl_context_properties) glfwGetX11Display(),
				CL_CONTEXT_PLATFORM, (cl_context_properties) (*platform)[checkPlatform],
				0};
			err = pclGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &device, NULL);
			if (err != CL_SUCCESS) {
				printf("Not Found.\n");
				checkPlatform++;
				if (checkPlatform > numPlatforms-1) {
					printf("---OpenCL: Error! Could not find OpenGL sharing device.\n");
					deviceFound = 1;
					render->glclInterop = 0;
				}
			}
			else {
				printf("Found!\n");
				deviceFound = 1;
				render->glclInterop = 1;
			}
		}
		else {
			checkPlatform++;
		}
	}

	if (render->glclInterop) {
		// Check the device we've found supports double precision
		clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(deviceInfo), deviceInfo, NULL);
		if (strstr(deviceInfo, "cl_khr_fp64") == NULL) {
			printf("---OpenCL: Interop device doesn't support double precision! We cannot use it.\n");
		}
		else {
			cl_context_properties properties[] = {
				CL_GL_CONTEXT_KHR, (cl_context_properties) glfwGetGLXContext(render->window),
				CL_GLX_DISPLAY_KHR, (cl_context_properties) glfwGetX11Display(),
				CL_CONTEXT_PLATFORM, (cl_context_properties) (*platform)[checkPlatform],
				0};
			render->contextCL = clCreateContext(properties, 1, &device, NULL, 0, &err);
			CheckOpenCLError(err, __LINE__);
		}
	}
#endif

	// if render->glclInterop is 0, either we are not trying to use it, we couldn't find an interop
	// device, or we found an interop device but it doesn't support double precision.
	// In these cases, have the user choose a platform and device manually.
	if (!(render->glclInterop)) {
		printf("Choose a platform and device.\n");
		checkPlatform = numPlatforms;
		while (checkPlatform >= numPlatforms) {
			printf("Platform: ");
			scanf("%u", &checkPlatform);
			if (checkPlatform >= numPlatforms) {
				printf("Invalid Platform choice.\n");
			}
		}

		cl_uint chooseDevice = numDevices[checkPlatform];
		while (chooseDevice >= numDevices[checkPlatform]) {
			printf("Device: ");
			scanf("%u", &chooseDevice);
			if (chooseDevice >= numDevices[checkPlatform]) {
				printf("Invalid Device choice.\n");
			} else {
				// Check the device we've chosen supports double precision
				clGetDeviceInfo((*device_id)[checkPlatform][chooseDevice], CL_DEVICE_EXTENSIONS, sizeof(deviceInfo), deviceInfo, NULL);
				if (strstr(deviceInfo, "cl_khr_fp64") == NULL) {
					printf("---OpenCL: Interop device doesn't support double precision! We cannot use it.\n");
					chooseDevice = numDevices[checkPlatform];
				}
			}
		}

		// Create non-interop context
		render->contextCL = clCreateContext(NULL, 1, &((*device_id)[checkPlatform][chooseDevice]), NULL, NULL, &err);
		device = (*device_id)[checkPlatform][chooseDevice];
	}
	////////////////////////////////

	// device is now fixed. Query its max global memory allocation size and store it, used in
	// HighResolutionRender routine, to determine into how many tiles we need to split the
	// computation.
	clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(render->deviceMaxAlloc), &(render->deviceMaxAlloc), NULL);
	printf("---OpenCL: Selected device has CL_DEVICE_MAX_MEM_ALLOC_SIZE: %lfMB\n",
	       render->deviceMaxAlloc/1024.0/1024.0);

	// create a command queue
	render->queue = clCreateCommandQueue(render->contextCL, device, 0, &err);
	CheckOpenCLError(err, __LINE__);


	//create the program with the source above
//	printf("Creating CL Program...\n");
	*program = clCreateProgramWithSource(render->contextCL, 1, (const char**)&kernelSource, NULL, &err);
	if (err != CL_SUCCESS) {
		printf("Error in clCreateProgramWithSource: %d, line %d.\n", err, __LINE__);
		return EXIT_FAILURE;
	}

	//build program executable
	err = clBuildProgram(*program, 0, NULL, "-I. -I src/", NULL, NULL);
	if (err != CL_SUCCESS) {
		printf("Error in clBuildProgram: %d, line %d.\n", err, __LINE__);
		char buffer[5000];
		clGetProgramBuildInfo(*program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
		printf("%s\n", buffer);
		return EXIT_FAILURE;
	}

	// dump ptx
	size_t binSize;
	clGetProgramInfo(*program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binSize, NULL);
	unsigned char *bin = malloc(binSize);
	clGetProgramInfo(*program, CL_PROGRAM_BINARIES, sizeof(unsigned char *), &bin, NULL);
	FILE *fp = fopen("openclPTX.ptx", "wb");
	fwrite(bin, sizeof(char), binSize, fp);
	fclose(fp);
	free(bin);

	free(numDevices);
	free(kernelSource);
	printf("\n");
	return EXIT_SUCCESS;
}
Пример #3
0
int main(void)
{
	printf("\n"
	       "Controls:  - Left/Right Click to zoom in/out, centring on cursor position.\n"
	       "           - Left Click and Drag to pan.\n"
	       "           - r to reset view.\n"
	       "           - q,w to decrease, increase max iteration count\n"
	       "           - a,s to decrease, increase colour period\n"
	       "           - g to toggle Gaussian Blur after computation\n"
	       "           - b to run some benchmarks.\n"
	       "           - p to show a double-precision limited zoom.\n"
	       "           - h to save a high resolution image of the current view to current directory.\n"
	       "           - Esc to quit.\n\n");

	// Set render function, dependent on compile time flag. All have the same signature,
	// with all necessary variables defined inside the structs.
#ifdef WITHOPENCL
	RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotOpenCL;
#elif defined(WITHAVX)
	RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotAVXCPU;
	// AVX double prec vector width (4) must divide horizontal (x) resolution
	assert(XRESOLUTION % 4 == 0);
#elif defined(WITHGMP)
	RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotGMPCPU;
#else
	RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotCPU;
#endif


	// Define and initialize structs
	imageStruct image;
	renderStruct render;
	// Set image resolution
	image.xRes = XRESOLUTION;
	image.yRes = YRESOLUTION;
	// Initial values for boundaries, iteration count
	SetInitialValues(&image);
	// Update OpenGL texture on render. This is disabled when rendering high resolution images
	render.updateTex = 1;
	// Allocate host memory, used to set up OpenGL texture, even if we are using interop OpenCL
	image.pixels = malloc(image.xRes * image.yRes * sizeof *(image.pixels) *3);


	// OpenGL variables and setup
	render.window = NULL;
	GLuint vertexShader, fragmentShader, shaderProgram;
	GLuint vao, vbo, ebo, tex;
	SetUpOpenGL(&(render.window), image.xRes, image.yRes, &vertexShader, &fragmentShader, &shaderProgram, &vao, &vbo, &ebo, &tex);


#ifdef WITHOPENCL
	// OpenCL variables and setup
	cl_platform_id    *platform;
	cl_device_id      **device_id;
	cl_program        program;
	cl_int            err;
	render.globalSize = image.xRes * image.yRes;
	render.localSize = OPENCLLOCALSIZE;
	assert(render.globalSize % render.localSize == 0);

	// Initially set variable that controls interop of OpenGL and OpenCL to 0, set to 1 if
	// interop device found successfully
	render.glclInterop = 0;

	if (InitialiseCLEnvironment(&platform, &device_id, &program, &render) == EXIT_FAILURE) {
		printf("Error initialising OpenCL environment\n");
		return EXIT_FAILURE;
	}
	size_t sizeBytes = image.xRes * image.yRes * 3 * sizeof(float);
	render.pixelsDevice = clCreateBuffer(render.contextCL, CL_MEM_READ_WRITE, sizeBytes, NULL, &err);
	// if we aren't using interop, allocate another buffer on the device for output, on the pointer
	// for the texture
	if (render.glclInterop == 0) {
		render.pixelsTex = clCreateBuffer(render.contextCL, CL_MEM_READ_WRITE, sizeBytes, NULL, &err);
	}

	// finish texture initialization so that we can use with OpenCL if glclInterop
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, image.xRes, image.yRes, 0, GL_RGB, GL_FLOAT, image.pixels);
	// Configure image from OpenGL texture "tex"
	if (render.glclInterop) {
		render.pixelsTex = clCreateFromGLTexture(render.contextCL, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, tex, &err);
		CheckOpenCLError(err, __LINE__);
	}


	// Create kernels
	render.renderMandelbrotKernel = clCreateKernel(program, "renderMandelbrotKernel", &err);
	CheckOpenCLError(err, __LINE__);
	render.gaussianBlurKernel = clCreateKernel(program, "gaussianBlurKernel", &err);
	CheckOpenCLError(err, __LINE__);
	render.gaussianBlurKernel2 = clCreateKernel(program, "gaussianBlurKernel2", &err);
	CheckOpenCLError(err, __LINE__);
#endif


	// Start main loop: Update until we encounter user input. Look for Esc key (quit), left and right mount
	// buttons (zoom in on cursor position, zoom out on cursor position), "r" -- reset back to initial coords,
	// "b" -- run some benchmarks, "p" -- display a double precision limited zoom.
	// Re-render the Mandelbrot set as and when we need, in the user input conditionals.

	// Initial render:
	RenderMandelbrot(&render, &image);

	while (!glfwWindowShouldClose(render.window)) {

		// draw
		glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
		// Swap buffers
		glfwSwapBuffers(render.window);


		// USER INPUT TESTS.
		// Continuous render, poll for input:
		//glfwPollEvents();
		// Render only on update, wait for input:
		glfwWaitEvents();


		// if user presses Esc, close window to leave loop
		if (glfwGetKey(render.window, GLFW_KEY_ESCAPE) == GLFW_PRESS) {
		    glfwSetWindowShouldClose(render.window, GL_TRUE);
		}


		// if user left-clicks in window, zoom in, centring on cursor position
		// if click and drag, simply re-position centre without zooming
		else if (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_LEFT) == GLFW_PRESS) {

			// Get Press cursor location
			double xPressPos, yPressPos, xReleasePos, yReleasePos;
			int shift = 0;
			glfwGetCursorPos(render.window, &xPressPos, &yPressPos);

			// Wait for mousebutton release, re-rendering as mouse moves
			while (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_LEFT) != GLFW_RELEASE) {

				glfwGetCursorPos(render.window, &xReleasePos, &yReleasePos);

				if (fabs(xReleasePos-xPressPos) > DRAGPIXELS || fabs(yReleasePos-yPressPos) > DRAGPIXELS) {
					// Set shift variable. Don't zoom after button release if this is 1
					shift = 1;
					// Determine shift in mandelbrot coords
					double xShift = (xReleasePos-xPressPos)/(double)image.xRes*(image.xMax-image.xMin);
					double yShift = (yReleasePos-yPressPos)/(double)image.yRes*(image.yMax-image.yMin);

					// Add shift to boundaries
					image.xMin = image.xMin - xShift;
					image.xMax = image.xMax - xShift;
					image.yMin = image.yMin - yShift;
					image.yMax = image.yMax - yShift;

					// Update "current" (press) position
					xPressPos = xReleasePos;
					yPressPos = yReleasePos;

					// Re-render and draw
					RenderMandelbrot(&render, &image);
					glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
					glfwSwapBuffers(render.window);
				}
				glfwPollEvents();
			}

			// else, zoom in smoothly over ZOOMSTEPS frames
			if (!shift) {
				SmoothZoom(&render, &image, RenderMandelbrot, xReleasePos, yReleasePos, ZOOMFACTOR, ITERSFACTOR);
			}
		}


		// if user right-clicks in window, zoom out, centring on cursor position
		else if (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_RIGHT) == GLFW_PRESS) {
			while (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_RIGHT) != GLFW_RELEASE) {
				glfwPollEvents();
			}

			// Get cursor position, in *screen coordinates*
			double xReleasePos, yReleasePos;
			glfwGetCursorPos(render.window, &xReleasePos, &yReleasePos);

			// Zooming out, so use 1/FACTORs.
			SmoothZoom(&render, &image, RenderMandelbrot, xReleasePos, yReleasePos, 1.0/ZOOMFACTOR, 1.0/ITERSFACTOR);
		}


		// if user presses "r", reset view
		else if (glfwGetKey(render.window, GLFW_KEY_R) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_R) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			printf("Resetting...\n");
			SetInitialValues(&image);
			RenderMandelbrot(&render, &image);
		}


		// if user presses "g", toggle gaussian blur
		else if (glfwGetKey(render.window, GLFW_KEY_G) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_G) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			if (image.gaussianBlur == 1) {
				printf("Toggling Gaussian Blur Off...\n");
				image.gaussianBlur = 0;
			}
			else {
				printf("Toggling Gaussian Blur On...\n");
				image.gaussianBlur = 1;
			}
			RenderMandelbrot(&render, &image);
		}


		// if user presses "q", decrease max iteration count
		else if (glfwGetKey(render.window, GLFW_KEY_Q) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_Q) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			printf("Decreasing max iteration count from %d to %d\n", image.maxIters, (int)(image.maxIters/ITERSFACTOR));
			image.maxIters /= ITERSFACTOR;
			RenderMandelbrot(&render, &image);
		}
		// if user presses "w", increase max iteration count
		else if (glfwGetKey(render.window, GLFW_KEY_W) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_W) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			printf("Increasing max iteration count from %d to %d\n", image.maxIters, (int)(image.maxIters*ITERSFACTOR));
			image.maxIters *= ITERSFACTOR;
			RenderMandelbrot(&render, &image);
		}


		// if user presses "a", decrease colour period
		else if (glfwGetKey(render.window, GLFW_KEY_A) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_A) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			printf("Decreasing colour period from %.0lf to %.0lf\n", image.colourPeriod, fmax(32, image.colourPeriod-32));
			image.colourPeriod = fmax(32, image.colourPeriod-32);
			RenderMandelbrot(&render, &image);
		}
		// if user presses "s", increase colour period
		else if (glfwGetKey(render.window, GLFW_KEY_S) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_S) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			printf("Increasing colour period from %.0lf to %.0lf\n", image.colourPeriod, image.colourPeriod+32);
			image.colourPeriod += 32;
			RenderMandelbrot(&render, &image);
		}


		// if user presses "b", run some benchmarks.
		else if (glfwGetKey(render.window, GLFW_KEY_B) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_B) != GLFW_RELEASE) {
				glfwPollEvents();
			}

			printf("Running Benchmarks...\n");

			printf("Whole fractal:\n");
			SetInitialValues(&image);
			RunBenchmark(&render, &image, RenderMandelbrot);

			printf("Early Bail-out:\n");
			image.xMin = -0.8153143016681144;
			image.xMax = -0.6839170011300622;
			image.yMin = -0.0365167077914237;
			image.yMax =  0.0373942737612310;
			image.maxIters = 112;
			RunBenchmark(&render, &image, RenderMandelbrot);

			printf("Spiral:\n");
			image.xMin = -0.8673755781976442;
			image.xMax = -0.8673711898931797;
			image.yMin = -0.2156059883952151;
			image.yMax = -0.2156035199739536;
			image.maxIters = 1757;
			RunBenchmark(&render, &image, RenderMandelbrot);

			printf("Highly zoomed:\n");
			image.xMin = -0.8712903154956539;
			image.xMax = -0.8712903108993595;
			image.yMin = -0.2293516610223087;
			image.yMax = -0.2293516584368930;
			image.maxIters = 10750;
			RunBenchmark(&render, &image, RenderMandelbrot);

			printf("Complete.\n");
		// Re-render with original coords
			SetInitialValues(&image);
			RenderMandelbrot(&render, &image);
		}


		// if user presses "p", zoom in, such that the double precision algorithm looks pixellated
		else if (glfwGetKey(render.window, GLFW_KEY_P) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_P) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			printf("Precision test...\n");
			image.xMin = -1.25334325335487362;
			image.xMax = -1.25334325335481678;
			image.yMin = -0.34446232396119353;
			image.yMax = -0.34446232396116155;
			image.maxIters = 1389952;
			RenderMandelbrot(&render, &image);
		}


		// if user presses "h", render a high resolution version of the current view, and
		// save it to disk as an image
		else if (glfwGetKey(render.window, GLFW_KEY_H) == GLFW_PRESS) {
			while (glfwGetKey(render.window, GLFW_KEY_H) != GLFW_RELEASE) {
				glfwPollEvents();
			}
			double startTime = GetWallTime();
			printf("Saving high resolution (%d x %d) image...\n",
			       image.xRes*HIGHRESOLUTIONMULTIPLIER, image.yRes*HIGHRESOLUTIONMULTIPLIER);
			HighResolutionRender(&render, &image, RenderMandelbrot);
			printf("   --- done. Total time: %lfs\n", GetWallTime()-startTime);
		}
	}



	// clean up
#ifdef WITHOPENCL
	CleanUpCLEnvironment(&platform, &device_id, &(render.contextCL), &(render.queue), &program);
#endif

	glDeleteProgram(shaderProgram);
	glDeleteShader(fragmentShader);
	glDeleteShader(vertexShader);
	glDeleteBuffers(1, &ebo);
	glDeleteBuffers(1, &vbo);
	glDeleteVertexArrays(1, &vao);

	// Close OpenGL window and terminate GLFW
	glfwDestroyWindow(render.window);
	glfwTerminate();

	// Free dynamically allocated memory
	free(image.pixels);
	return 0;
}
Пример #4
0
void HighResolutionRender(renderStruct *render, imageStruct *image, RenderMandelbrotPtr RenderMandelbrot)
{
	// Set new resolution
	image->xRes = image->xRes*HIGHRESOLUTIONMULTIPLIER;
	image->yRes = image->yRes*HIGHRESOLUTIONMULTIPLIER;

	//Set updateTex flag to 0 so we don't try to draw it on screen.
	render->updateTex = 0;

	// Allocate new host pixels array of larger size. We need this regardless of interop
	free(image->pixels);
	// CAREFUL: these sizes can easily overflow a 32bit int. Use uint64_t
	uint64_t allocSize = image->xRes * image->yRes * sizeof*(image->pixels) *3;
	printf("   --- reallocating host pixels array: %.2lfMB\n", allocSize/1024.0/1024.0);
	image->pixels = malloc(allocSize);


#ifdef WITHOPENCL
	// Here, it is likely that the array(s) of colour values will not fit in device global memory.
	// We have to render the frame in tiles, each of which fits.
	//
	// OpenCL platform gives us a max allocation, which is "at least" 1/4 of the memory size. We
	// can't therefore, allocate two arrays of the max allocation -- they are not guaranteed to fit.
	// We allocate two arrays, each half the max allocation.
	cl_int err;

	// Determine the size (in bytes) of one row of pixels
	uint64_t rowSize = image->xRes * sizeof *(image->pixels) * 3;

	// Determine how many rows we can fit in half the max allocation
	uint64_t maxAllocRows = (render->deviceMaxAlloc/2) / rowSize;

	// The number of tiles required to render the frame:
	int tiles = (int)ceil((double)image->yRes/(double)maxAllocRows);
	// Size of a single tile
	uint64_t fullTileSize = maxAllocRows * rowSize;

	// Allocate tile-sized global memory arrays on device.
	// Store handle to OpenGL texture so it can be recovered later.
	// The struct variable will be reassigned, this makes running the kernels simpler.
	cl_mem keepPixelsTex = render->pixelsTex;
	// Release the existing pixels array
	err = clReleaseMemObject(render->pixelsDevice);
	CheckOpenCLError(err, __LINE__);
	// Allocate new arrays
	printf("   --- allocating tile arrays on device: %lfMB\n", 2.0*fullTileSize/1024.0/1024.0);
	render->pixelsDevice = clCreateBuffer(render->contextCL, CL_MEM_READ_WRITE, fullTileSize, NULL, &err);
	CheckOpenCLError(err, __LINE__);
	render->pixelsTex = clCreateBuffer(render->contextCL, CL_MEM_READ_WRITE, fullTileSize, NULL, &err);
	CheckOpenCLError(err, __LINE__);

	// Store full image resolution and boundaries, we will adjust the values in the struct
	int yResFull = image->yRes;
	double yMinFull = image->yMin;
	double yMaxFull = image->yMax;


	// Render tiles, and copy data back to the host array
	for (int t = 0; t < tiles; t++) {
		printf("   --- computing tile %d/%d...\n", t+1, tiles);

		// Set tile resolution. Each has maxAllocRows apart from the last,
		// which might have fewer as it contains the remainder.
		image->yRes = maxAllocRows;
		if (t == tiles-1) {
			image->yRes = yResFull % maxAllocRows;
		}
		// Reset global size, as the resolution has changed
		render->globalSize = image->yRes * image->xRes;
		assert(render->globalSize % render->localSize == 0);

		// Set tile boundaries. We are computing a fraction maxAllocRows/yResFull of the full
		//image, starting at the beginning of the t-th tile. The final tile uses xMaxFull for xMax.
		image->yMin = yMinFull + t*((double)maxAllocRows/(double)yResFull)*(yMaxFull-yMinFull);
		image->yMax = yMinFull + (t+1)*((double)maxAllocRows/(double)yResFull)*(yMaxFull-yMinFull);
		if (t == tiles-1) {
			image->yMax = yMaxFull;
		}

		// Render
		RenderMandelbrot(render, image);

		// Copy data from render->pixelsTex (the output of GaussianBlurKernel2)
		uint64_t storeOffset = t * (maxAllocRows * image->xRes * 3);
		// The final tile is smaller
		if (t == tiles-1) {
			fullTileSize = image->yRes * image->xRes * sizeof *(image->pixels) * 3;
		}
		err = clEnqueueReadBuffer(render->queue, render->pixelsTex, CL_TRUE, 0, fullTileSize,
		                          &(image->pixels[storeOffset]), 0, NULL, NULL);
		CheckOpenCLError(err, __LINE__);
	}

	// Reset image boundaries and resolution
	image->yRes = yResFull;
	image->yMin = yMinFull;
	image->yMax = yMaxFull;


#else
	// If not using OpenCL, simply render directly onto the reallocated image->pixels array
	RenderMandelbrot(render, image);
#endif


	// Save png
	printf("   --- creating png...\n");
	// Create raw pixel array
	GLubyte * rawPixels;
	uint64_t rawAllocSize = image->xRes * image->yRes * sizeof *rawPixels *3;
	rawPixels = malloc(rawAllocSize);
	for (uint64_t i = 0; i < image->xRes*image->yRes*3; i+=3) {
		// note change in order, rgb -> bgr!
		rawPixels[i+0] = (GLubyte)((image->pixels)[i+2]*255);
		rawPixels[i+1] = (GLubyte)((image->pixels)[i+1]*255);
		rawPixels[i+2] = (GLubyte)((image->pixels)[i+0]*255);
	}
	FIBITMAP* img = FreeImage_ConvertFromRawBits(rawPixels, image->xRes, image->yRes, 3*image->xRes,
	                                             24, 0x000000, 0x000000, 0x000000, TRUE);
	FreeImage_Save(FIF_PNG, img, "test.png", 0);
	FreeImage_Unload(img);
	free(rawPixels);


	// Reset original values to continue with live rendering
	image->xRes = image->xRes/HIGHRESOLUTIONMULTIPLIER;
	image->yRes = image->yRes/HIGHRESOLUTIONMULTIPLIER;
	render->updateTex = 1;
	free(image->pixels);
	image->pixels = malloc(image->xRes * image->yRes * sizeof *(image->pixels) *3);

#ifdef WITHOPENCL
	// Release large global memory buffers
	clReleaseMemObject(render->pixelsDevice);
	clReleaseMemObject(render->pixelsTex);
	// Recover handle to OpenGL texture
	render->pixelsTex = keepPixelsTex;
	size_t allocSizeOrig = image->xRes * image->yRes * 3 * sizeof *(image->pixels);
	render->pixelsDevice = clCreateBuffer(render->contextCL, CL_MEM_READ_WRITE, allocSizeOrig, NULL, &err);
	CheckOpenCLError(err, __LINE__);
	// Reset global size, as the resolution has changed
	render->globalSize = image->yRes * image->xRes;
	assert(render->globalSize % render->localSize == 0);
#endif

}
Пример #5
0
void PGR_radiosity::releaseCL()
{
    /* Releases OpenCL resources (Context, Memory etc.) */
    cl_int status;

    status = clReleaseKernel(this->sortKernel);
    CheckOpenCLError(status, "clReleaseKernel sortKernel.");
    status = clReleaseKernel(this->radiosityKernel);
    CheckOpenCLError(status, "clReleaseKernel radiosityKernel.");

    status = clReleaseMemObject(this->indicesCL);
    CheckOpenCLError(status, "clReleaseMemObject indicesCL");
    status = clReleaseMemObject(this->patchesColorsCL);
    CheckOpenCLError(status, "clReleaseMemObject patchesCL");
    status = clReleaseMemObject(this->patchesGeoCL);
    CheckOpenCLError(status, "clReleaseMemObject patchesGeometryCL");
    status = clReleaseMemObject(this->indicesCountCL);
    CheckOpenCLError(status, "clReleaseMemObject indicesCountCL");
    status = clReleaseMemObject(this->patchesEnergiesCL);
    CheckOpenCLError(status, "clReleaseMemObject patchesEnergiesCL");
    status = clReleaseMemObject(this->maximalEnergyCL);
    CheckOpenCLError(status, "clReleaseMemObject maximalEnergyCL");
    status = clReleaseMemObject(this->diffColorsCL);
    CheckOpenCLError(status, "clReleaseMemObject diffColorsCL");
    status = clReleaseMemObject(this->intensitiesCL);
    CheckOpenCLError(status, "clReleaseMemObject intensitiesCL");
    status = clReleaseMemObject(this->texturesCL);
    CheckOpenCLError(status, "clReleaseMemObject texturesCL");

    status = clReleaseProgram(this->program);
    CheckOpenCLError(status, "clReleaseProgram.");

    status = clReleaseCommandQueue(this->queue);
    CheckOpenCLError(status, "clReleaseCommandQueue.");

    status = clReleaseContext(this->context);
    CheckOpenCLError(status, "clReleaseContext.");

    delete [] this->raw_patchesColors;
    delete [] this->raw_patchesGeo;
    delete [] this->raw_patchesEnergies;
    delete [] this->raw_intensities;
    delete [] this->raw_diffColors;
    delete [] this->raw_textures;
}
Пример #6
0
void PGR_radiosity::runRadiosityKernelCL()
{
    int status;
    int cycles = 0;

    /* Events */
    cl_event event_radiosity, event_sort, event_maximalEnergy, event_indices, event_textures, event_indicesCount;

    float maximalEnergy;

    maximalEnergy = (float) this->model->getMaximalEnergy();

    /* Setup arguments to the radiosity kernel */
    status = clSetKernelArg(this->radiosityKernel, 0, sizeof (cl_mem), &this->patchesGeoCL);
    CheckOpenCLError(status, "clSetKernelArg. (patchesCL)");

    status = clSetKernelArg(this->radiosityKernel, 1, sizeof (cl_mem), &this->patchesColorsCL);
    CheckOpenCLError(status, "clSetKernelArg. (patchesCL)");

    cl_uint patchesCount = (cl_uint)this->model->getPatchesCount();
    status = clSetKernelArg(this->radiosityKernel, 2, sizeof (cl_uint), &patchesCount);
    CheckOpenCLError(status, "clSetKernelArg. (patchesCount)");

    /* Set initial energy */
    this->raw_indices = new cl_uint[this->workGroupSize];
    cl_uint indicesCount = this->model->getIdsOfNMostEnergizedPatchesCL(this->raw_indices, this->workGroupSize, LIMIT);

    status = clEnqueueWriteBuffer(this->queue,
                                  this->indicesCountCL,
                                  CL_TRUE, //blocking write
                                  0,
                                  sizeof (cl_uint),
                                  &indicesCount,
                                  0,
                                  0,
                                  0);
    CheckOpenCLError(status, "Copy indicesCount to GPU");

    status = clEnqueueWriteBuffer(this->queue,
                                  this->indicesCL,
                                  CL_TRUE, //blocking write
                                  0,
                                  indicesCount * sizeof (cl_uint),
                                  this->raw_indices,
                                  0,
                                  0,
                                  0);
    CheckOpenCLError(status, "Copy indices to GPU");

    status = clSetKernelArg(this->radiosityKernel, 3, sizeof (cl_mem), &this->indicesCL);
    CheckOpenCLError(status, "clSetKernelArg. (indicesCL)");

    status = clSetKernelArg(this->radiosityKernel, 4, sizeof (cl_mem), &this->indicesCountCL);
    CheckOpenCLError(status, "clSetKernelArg. (indicesCount)");

    status = clSetKernelArg(this->radiosityKernel, 5, sizeof (cl_mem), &this->patchesEnergiesCL);
    CheckOpenCLError(status, "clSetKernelArg. (patchesCL)");

    status = clSetKernelArg(this->radiosityKernel, 6, sizeof (cl_mem), &this->diffColorsCL);
    CheckOpenCLError(status, "clSetKernelArg. (diffColorsCL)");

    status = clSetKernelArg(this->radiosityKernel, 7, sizeof (cl_mem), &this->intensitiesCL);
    CheckOpenCLError(status, "clSetKernelArg. (intensitiesCL)");

    this->raw_textures = new cl_uchar3[768 * 256 * this->workGroupSize];
    this->model->getTextureCL(this->raw_textures, this->raw_indices, indicesCount);
    status = clEnqueueWriteBuffer(this->queue,
                                  this->texturesCL,
                                  CL_TRUE, //blocking write
                                  0,
                                  768 * 256 * indicesCount * sizeof (cl_uchar3),
                                  this->raw_textures,
                                  0,
                                  0,
                                  0);
    CheckOpenCLError(status, "Copy textures to GPU");

    status = clSetKernelArg(this->radiosityKernel, 8, sizeof (cl_mem), &this->texturesCL);
    CheckOpenCLError(status, "clSetKernelArg. (texturesCL)");

    status = clSetKernelArg(this->radiosityKernel, 9, sizeof (cl_mem), &this->visitedCL);
    CheckOpenCLError(status, "clSetKernelArg. (visitedCL)");

    size_t globalThreadsMain[] = {this->workGroupSize};
    size_t localThreadsMain[] = {this->workGroupSize};

    /* Set arguments for sort kernel */
    status = clSetKernelArg(this->sortKernel, 0, sizeof (cl_mem), &this->patchesEnergiesCL);
    CheckOpenCLError(status, "clSetKernelArg. (patchesCL)");

    status = clSetKernelArg(this->sortKernel, 1, sizeof (cl_uint), &patchesCount);
    CheckOpenCLError(status, "clSetKernelArg. (patchesCount)");

    status = clSetKernelArg(this->sortKernel, 2, sizeof (cl_mem), &this->indicesCL);
    CheckOpenCLError(status, "clSetKernelArg. (indicesCL)");

    status = clSetKernelArg(this->sortKernel, 3, sizeof (cl_mem), &this->indicesCountCL);
    CheckOpenCLError(status, "clSetKernelArg. (indicesCount)");

    status = clSetKernelArg(this->sortKernel, 4, sizeof (cl_uint), &this->workGroupSize);
    CheckOpenCLError(status, "clSetKernelArg. (n)");

    float limit = LIMIT;
    status = clSetKernelArg(this->sortKernel, 5, sizeof (cl_float), &limit);
    CheckOpenCLError(status, "clSetKernelArg. (limit)");

    status = clSetKernelArg(this->sortKernel, 6, sizeof (cl_mem), &this->maximalEnergyCL);
    CheckOpenCLError(status, "clSetKernelArg. (maximalEnergy)");

    size_t globalThreadsSort[] = {1}; //only one kernel computes
    size_t localThreadsSort[] = {1};

    cl_bool* zeroVisited = new cl_bool[this->maxWorkGroupSize * this->model->getPatchesCount()];
    memset(zeroVisited, 0, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool));

    debug_log = false;
    while (maximalEnergy > LIMIT)
    {
        cout << cycles << " energy: " << maximalEnergy << endl;
        cycles++;

        /* Start kernel - radiosity step*/
        status = clEnqueueNDRangeKernel(this->queue,
                                        this->radiosityKernel,
                                        1, //1D
                                        NULL, //offset
                                        globalThreadsMain,
                                        localThreadsMain,
                                        0,
                                        NULL,
                                        &event_radiosity);

        CheckOpenCLError(status, "clEnqueueNDRangeKernel radiosityKernel.");

        /* Start kernel - recompute indices array */
        status = clEnqueueNDRangeKernel(this->queue,
                                        this->sortKernel,
                                        1, //1D
                                        NULL, //offset
                                        globalThreadsSort,
                                        localThreadsSort,
                                        1,
                                        &event_radiosity,
                                        &event_sort);
        CheckOpenCLError(status, "clEnqueueNDRangeKernel sortKernel.");


        /* Read maximal energy from buffer */
        status = clEnqueueReadBuffer(this->queue,
                                     this->maximalEnergyCL,
                                     CL_TRUE, //blocking write
                                     0,
                                     sizeof (cl_float),
                                     &maximalEnergy,
                                     1,
                                     &event_sort,
                                     &event_maximalEnergy);
        CheckOpenCLError(status, "Read maximal energy");

        status = clEnqueueReadBuffer(this->queue,
                                     this->indicesCountCL,
                                     CL_TRUE, //blocking write
                                     0,
                                     sizeof (cl_uint),
                                     &indicesCount,
                                     1,
                                     &event_sort,
                                     &event_indicesCount);
        CheckOpenCLError(status, "Read indices count");

        status = clWaitForEvents(1, &event_indicesCount);
        CheckOpenCLError(status, "clWaitForEvents read indices count.");

        if (indicesCount == 0)
            break;

        status = clEnqueueReadBuffer(this->queue,
                                     this->indicesCL,
                                     CL_TRUE, //blocking write
                                     0,
                                     indicesCount * sizeof (cl_uint),
                                     this->raw_indices,
                                     0,
                                     NULL,
                                     &event_indices);
        CheckOpenCLError(status, "Read indices");

        status = clWaitForEvents(1, &event_indices);
        CheckOpenCLError(status, "clWaitForEvents read Indices");

        this->model->getTextureCL(this->raw_textures, this->raw_indices, indicesCount);
        status = clEnqueueWriteBuffer(this->queue,
                                      this->texturesCL,
                                      CL_TRUE, //blocking write
                                      0,
                                      768 * 256 * indicesCount * sizeof (cl_uchar3),
                                      this->raw_textures,
                                      0,
                                      NULL,
                                      &event_textures);
        CheckOpenCLError(status, "Copy textures to GPU");
        status = clWaitForEvents(1, &event_textures);
        CheckOpenCLError(status, "clWaitForEvents write textures.");


        status = clEnqueueWriteBuffer(this->queue,
                                      this->visitedCL,
                                      CL_TRUE, //blocking write
                                      0,
                                      this->model->getPatchesCount() * sizeof (cl_bool),
                                      zeroVisited,
                                      0,
                                      0,
                                      0);
        CheckOpenCLError(status, "Clear visited flags");


        status = clWaitForEvents(1, &event_maximalEnergy);
        CheckOpenCLError(status, "clWaitForEvents read Maximal energy.");
    }

    delete [] zeroVisited;
    cout << "cycles: " << cycles << endl;
    debug_log = true;
}
Пример #7
0
int PGR_radiosity::prepareCL()
{
    cl_int ciErr = CL_SUCCESS;

    // Get Platform
    cl_platform_id *cpPlatforms;
    cl_uint cuiPlatformsCount;
    ciErr = clGetPlatformIDs(0, NULL, &cuiPlatformsCount);
    this->CheckOpenCLError(ciErr, "clGetPlatformIDs: cuiPlatformsNum=%i", cuiPlatformsCount);
    cpPlatforms = (cl_platform_id*) malloc(cuiPlatformsCount * sizeof (cl_platform_id));
    ciErr = clGetPlatformIDs(cuiPlatformsCount, cpPlatforms, NULL);
    this->CheckOpenCLError(ciErr, "clGetPlatformIDs");

    cl_platform_id platform = 0;

    const unsigned int TMP_BUFFER_SIZE = 1024;
    char sTmp[TMP_BUFFER_SIZE];

    for (unsigned int f0 = 0; f0 < cuiPlatformsCount; f0++)
    {
        //bool shouldBrake = false;
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_PROFILE=%s", f0, sTmp);
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VERSION, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VERSION=%s", f0, sTmp);
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_NAME, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_NAME=%s", f0, sTmp);
        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VENDOR=%s", f0, sTmp);

        //prioritize AMD and CUDA platforms

        if ((strcmp(sTmp, "NVIDIA Corporation") == 0))
        {
            platform = cpPlatforms[f0];
        }

        //        if ((strcmp(sTmp, "Advanced Micro Devices, Inc.") == 0))
        //        {
        //            platform = cpPlatforms[f0];
        //        }

        //prioritize Intel
        /*if ((strcmp(sTmp, "Intel(R) Corporation") == 0)) {
            platform = cpPlatforms[f0];
        }*/

        ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL);
        this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_EXTENSIONS=%s", f0, sTmp);
    }

    if (platform == 0)
    { //no prioritized found
        if (cuiPlatformsCount > 0)
        {
            platform = cpPlatforms[0];
        }
        else
        {
            cerr << "No device was found" << endl;
            return -1;
        }
    }
    // Get Devices
    cl_uint cuiDevicesCount;
    ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &cuiDevicesCount);
    CheckOpenCLError(ciErr, "clGetDeviceIDs: cuiDevicesCount=%i", cuiDevicesCount);
    cl_device_id *cdDevices = (cl_device_id*) malloc(cuiDevicesCount * sizeof (cl_device_id));
    ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, cuiDevicesCount, cdDevices, NULL);
    CheckOpenCLError(ciErr, "clGetDeviceIDs");

    unsigned int deviceIndex = 0;

    for (unsigned int f0 = 0; f0 < cuiDevicesCount; f0++)
    {
        cl_device_type cdtTmp;
        size_t iDim[3];

        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_TYPE, sizeof (cdtTmp), &cdtTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_TYPE=%s%s%s%s", f0, cdtTmp & CL_DEVICE_TYPE_CPU ? "CPU," : "",
                         cdtTmp & CL_DEVICE_TYPE_GPU ? "GPU," : "",
                         cdtTmp & CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR," : "",
                         cdtTmp & CL_DEVICE_TYPE_DEFAULT ? "DEFAULT," : "");

        if (cdtTmp & CL_DEVICE_TYPE_GPU)
        { //prioritize gpu if both cpu and gpu are available
            deviceIndex = f0;
        }

        cl_bool bTmp;
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_AVAILABLE, sizeof (bTmp), &bTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_AVAILABLE=%s", f0, bTmp ? "YES" : "NO");
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_NAME, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_NAME=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VENDOR=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DRIVER_VERSION, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DRIVER_VERSION=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_PROFILE=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VERSION, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VERSION=%s", f0, sTmp);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof (iDim), iDim, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_ITEM_SIZES=%ix%ix%i", f0, iDim[0], iDim[1], iDim[2]);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t), iDim, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_GROUP_SIZE=%i", f0, iDim[0]);
        ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL);
        CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_EXTENSIONS=%s", f0, sTmp);
    }

    cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0};



    /* Create context */
    this->context = clCreateContext(cps, 1, &cdDevices[deviceIndex], NULL, NULL, &ciErr);
    CheckOpenCLError(ciErr, "clCreateContext");

    /* Create a command queue */
    this->queue = clCreateCommandQueue(this->context, cdDevices[deviceIndex], CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &ciErr);
    CheckOpenCLError(ciErr, "clCreateCommandQueue");

    /* Create and compile and openCL program */
    char *cSourceCL = loadProgSource("kernels.cl");

    this->program = clCreateProgramWithSource(this->context, 1, (const char **) &cSourceCL, NULL, &ciErr);
    CheckOpenCLError(ciErr, "clCreateProgramWithSource");
    free(cSourceCL);

    ciErr = clBuildProgram(this->program, 0, NULL, NULL, NULL, NULL);
    CheckOpenCLError(ciErr, "clBuildProgram");

    cl_int logStatus;
    char *buildLog = NULL;
    size_t buildLogSize = 0;
    logStatus = clGetProgramBuildInfo(this->program,
                                      cdDevices[deviceIndex],
                                      CL_PROGRAM_BUILD_LOG,
                                      buildLogSize,
                                      buildLog,
                                      &buildLogSize);

    CheckOpenCLError(logStatus, "clGetProgramBuildInfo.");

    buildLog = (char*) malloc(buildLogSize);
    if (buildLog == NULL)
    {
        printf("Failed to allocate host memory. (buildLog)");
        return -1;
    }
    memset(buildLog, 0, buildLogSize);

    logStatus = clGetProgramBuildInfo(this->program,
                                      cdDevices[deviceIndex],
                                      CL_PROGRAM_BUILD_LOG,
                                      buildLogSize,
                                      buildLog,
                                      NULL);
    CheckOpenCLError(logStatus, "clGetProgramBuildInfo.");
    free(buildLog);

    size_t tempKernelWorkGroupSize;

    /* Create kernels */
    this->radiosityKernel = clCreateKernel(program, "radiosity", &ciErr);
    CheckOpenCLError(ciErr, "clCreateKernel radiosity");
    this->sortKernel = clCreateKernel(program, "sort", &ciErr);
    CheckOpenCLError(ciErr, "clCreateKernel sort");

    this->maxWorkGroupSize = 64;
    this->workGroupSize = 64;

    ciErr = clGetKernelWorkGroupInfo(this->radiosityKernel,
                                     cdDevices[deviceIndex],
                                     CL_KERNEL_WORK_GROUP_SIZE,
                                     sizeof (size_t),
                                     &tempKernelWorkGroupSize,
                                     0);
    CheckOpenCLError(ciErr, "clGetKernelInfo");
    this->maxWorkGroupSize = MIN(tempKernelWorkGroupSize, this->maxWorkGroupSize);

    if (this->workGroupSize > this->maxWorkGroupSize)
    {
        cout << "Out of Resources!" << endl;
        cout << "Group Size specified: " << this->workGroupSize << endl;
        cout << "Max Group Size supported on the kernel: " << this->maxWorkGroupSize << endl;
        cout << "Falling back to " << this->maxWorkGroupSize << endl;

        this->workGroupSize = this->maxWorkGroupSize;
    }


    /* Allocate buffer of colors */
    this->patchesColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer patchesCL");

    this->raw_patchesColors = new cl_uchar3[this->model->getPatchesCount()];
    this->raw_patchesEnergies = new cl_float[this->model->getPatchesCount()];
    this->raw_diffColors = new cl_uchar3[this->model->getPatchesCount()];
    this->raw_intensities = new cl_float[this->model->getPatchesCount()];
    this->model->getPatchesCL(this->raw_patchesColors, this->raw_patchesEnergies);

    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->patchesColorsCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_uchar3),
                                 this->raw_patchesColors,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Copy patches colors");

    /* Alocate buffer of energies */
    this->patchesEnergiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer patchesCL");

    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->patchesEnergiesCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_float),
                                 this->raw_patchesEnergies,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Copy patches");

    /* Allocate buffer of patches geometry */
    this->patchesGeoCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->model->getPatchesCount() * sizeof (cl_float8), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer patchesGeometryCL");

    this->raw_patchesGeo = new cl_float8[this->model->getPatchesCount()];
    this->model->getPatchesGeometryCL(raw_patchesGeo);
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->patchesGeoCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_float8),
                                 this->raw_patchesGeo,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Copy patches geometry");


    this->indicesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * sizeof (cl_uint), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer indicesCL");

    this->indicesCountCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_uint), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer indicesCountCL");

    this->maximalEnergyCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_float), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer maximalEnergyCL");

    this->diffColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer diffColorsCL");

    cl_uchar3* zeros = new cl_uchar3[this->model->getPatchesCount()];
    memset(zeros, 0, this->model->getPatchesCount() * sizeof (cl_uchar3));
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->diffColorsCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_uchar3),
                                 zeros,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Clear diff colors");
    delete [] zeros;

    this->intensitiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer intensitiesCL");

    cl_float* zeroIntensity = new cl_float[this->model->getPatchesCount()];
    memset(zeroIntensity, 0, this->model->getPatchesCount() * sizeof (cl_float));
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->intensitiesCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_float),
                                 zeroIntensity,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Clear intensities");
    delete [] zeroIntensity;

    this->texturesCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->maxWorkGroupSize * 768 * 256 * sizeof (cl_uchar3), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer texturesCL");

    this->visitedCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool), 0, &ciErr);
    CheckOpenCLError(ciErr, "CreateBuffer visitedCL");

    cl_bool* zeroVisited = new cl_bool[this->maxWorkGroupSize * this->model->getPatchesCount()];
    memset(zeroVisited, 0, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool));
    ciErr = clEnqueueWriteBuffer(this->queue,
                                 this->visitedCL,
                                 CL_TRUE, //blocking write
                                 0,
                                 this->model->getPatchesCount() * sizeof (cl_bool),
                                 zeroVisited,
                                 0,
                                 0,
                                 0);
    CheckOpenCLError(ciErr, "Clear visited flags");
    delete [] zeroVisited;

    free(cdDevices);

    return 0;
}