void PGR_radiosity::computeRadiosityCL() { /* Prepare CL structures */ if (this->prepareCL() != 0) { this->releaseCL(); return; } /* Run OpenCL kernel that computes radiosity. It includes a loop */ this->runRadiosityKernelCL(); clFinish(this->queue); int status = clEnqueueReadBuffer(this->queue, this->diffColorsCL, CL_TRUE, //blocking read 0, this->model->getPatchesCount() * sizeof (cl_uchar3), this->raw_diffColors, 0, 0, 0); CheckOpenCLError(status, "Read diffColors."); status = clEnqueueReadBuffer(this->queue, this->intensitiesCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_float), this->raw_intensities, 0, 0, 0); CheckOpenCLError(status, "Read intensities."); /* Decode opencl memory objects */ this->model->decodeData(this->raw_diffColors, this->raw_intensities, this->model->getPatchesCount()); this->model->recomputeColors(); this->model->updateArrays(); /* Release CL structures */ this->releaseCL(); }
// OpenCL functions int InitialiseCLEnvironment(cl_platform_id **platform, cl_device_id ***device_id, cl_program *program, renderStruct *render) { // error flag cl_int err; char infostring[1024]; char deviceInfo[1024]; // need to ensure platform supports OpenGL OpenCL interop before querying devices // to avoid segfault when calling clGetGLContextInfoKHR int *platformSupportsInterop; //get kernel from file FILE* kernelFile = fopen(kernelFileName, "rb"); fseek(kernelFile, 0, SEEK_END); long fileLength = ftell(kernelFile); rewind(kernelFile); char *kernelSource = malloc(fileLength*sizeof(char)); long read = fread(kernelSource, sizeof(char), fileLength, kernelFile); if (fileLength != read) printf("Error reading kernel file, line %d\n", __LINE__); fclose(kernelFile); //get platform and device information cl_uint numPlatforms; err = clGetPlatformIDs(0, NULL, &numPlatforms); *platform = malloc(numPlatforms * sizeof(cl_platform_id)); *device_id = malloc(numPlatforms * sizeof(cl_device_id*)); platformSupportsInterop = malloc(numPlatforms * sizeof(*platformSupportsInterop)); err |= clGetPlatformIDs(numPlatforms, *platform, NULL); CheckOpenCLError(err, __LINE__); cl_uint *numDevices; numDevices = malloc(numPlatforms * sizeof(cl_uint)); for (cl_uint i = 0; i < numPlatforms; i++) { clGetPlatformInfo((*platform)[i], CL_PLATFORM_VENDOR, sizeof(infostring), infostring, NULL); printf("\n---OpenCL: Platform Vendor %d: %s\n", i, infostring); err = clGetDeviceIDs((*platform)[i], CL_DEVICE_TYPE_ALL, 0, NULL, &(numDevices[i])); CheckOpenCLError(err, __LINE__); (*device_id)[i] = malloc(numDevices[i] * sizeof(cl_device_id)); platformSupportsInterop[i] = 0; err = clGetDeviceIDs((*platform)[i], CL_DEVICE_TYPE_ALL, numDevices[i], (*device_id)[i], NULL); CheckOpenCLError(err, __LINE__); for (cl_uint j = 0; j < numDevices[i]; j++) { char deviceName[200]; clGetDeviceInfo((*device_id)[i][j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); printf("---OpenCL: Device found %d. %s\n", j, deviceName); clGetDeviceInfo((*device_id)[i][j], CL_DEVICE_EXTENSIONS, sizeof(deviceInfo), deviceInfo, NULL); if (strstr(deviceInfo, "cl_khr_gl_sharing") != NULL) { printf("---OpenCL: cl_khr_gl_sharing supported!\n"); platformSupportsInterop[i] = 1; } else { printf("---OpenCL: cl_khr_gl_sharing NOT supported!\n"); platformSupportsInterop[i] |= 0; } if (strstr(deviceInfo, "cl_khr_fp64") != NULL) { printf("---OpenCL: cl_khr_fp64 supported!\n"); } else { printf("---OpenCL: cl_khr_fp64 NOT supported!\n"); } } } printf("\n"); //////////////////////////////// // This part is different to how we usually do things. Need to get context and device from existing // OpenGL context. Loop through all platforms looking for the device: cl_device_id device = NULL; int deviceFound = 0; cl_uint checkPlatform = 0; #ifdef TRYINTEROP while (!deviceFound) { if (platformSupportsInterop[checkPlatform]) { printf("---OpenCL: Looking for OpenGL Context device on platform %d ... ", checkPlatform); clGetGLContextInfoKHR_fn pclGetGLContextInfoKHR; PTR_FUNC_PTR pclGetGLContextInfoKHR = clGetExtensionFunctionAddressForPlatform((*platform)[checkPlatform], "clGetGLContextInfoKHR"); cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties) glfwGetGLXContext(render->window), CL_GLX_DISPLAY_KHR, (cl_context_properties) glfwGetX11Display(), CL_CONTEXT_PLATFORM, (cl_context_properties) (*platform)[checkPlatform], 0}; err = pclGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &device, NULL); if (err != CL_SUCCESS) { printf("Not Found.\n"); checkPlatform++; if (checkPlatform > numPlatforms-1) { printf("---OpenCL: Error! Could not find OpenGL sharing device.\n"); deviceFound = 1; render->glclInterop = 0; } } else { printf("Found!\n"); deviceFound = 1; render->glclInterop = 1; } } else { checkPlatform++; } } if (render->glclInterop) { // Check the device we've found supports double precision clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(deviceInfo), deviceInfo, NULL); if (strstr(deviceInfo, "cl_khr_fp64") == NULL) { printf("---OpenCL: Interop device doesn't support double precision! We cannot use it.\n"); } else { cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties) glfwGetGLXContext(render->window), CL_GLX_DISPLAY_KHR, (cl_context_properties) glfwGetX11Display(), CL_CONTEXT_PLATFORM, (cl_context_properties) (*platform)[checkPlatform], 0}; render->contextCL = clCreateContext(properties, 1, &device, NULL, 0, &err); CheckOpenCLError(err, __LINE__); } } #endif // if render->glclInterop is 0, either we are not trying to use it, we couldn't find an interop // device, or we found an interop device but it doesn't support double precision. // In these cases, have the user choose a platform and device manually. if (!(render->glclInterop)) { printf("Choose a platform and device.\n"); checkPlatform = numPlatforms; while (checkPlatform >= numPlatforms) { printf("Platform: "); scanf("%u", &checkPlatform); if (checkPlatform >= numPlatforms) { printf("Invalid Platform choice.\n"); } } cl_uint chooseDevice = numDevices[checkPlatform]; while (chooseDevice >= numDevices[checkPlatform]) { printf("Device: "); scanf("%u", &chooseDevice); if (chooseDevice >= numDevices[checkPlatform]) { printf("Invalid Device choice.\n"); } else { // Check the device we've chosen supports double precision clGetDeviceInfo((*device_id)[checkPlatform][chooseDevice], CL_DEVICE_EXTENSIONS, sizeof(deviceInfo), deviceInfo, NULL); if (strstr(deviceInfo, "cl_khr_fp64") == NULL) { printf("---OpenCL: Interop device doesn't support double precision! We cannot use it.\n"); chooseDevice = numDevices[checkPlatform]; } } } // Create non-interop context render->contextCL = clCreateContext(NULL, 1, &((*device_id)[checkPlatform][chooseDevice]), NULL, NULL, &err); device = (*device_id)[checkPlatform][chooseDevice]; } //////////////////////////////// // device is now fixed. Query its max global memory allocation size and store it, used in // HighResolutionRender routine, to determine into how many tiles we need to split the // computation. clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(render->deviceMaxAlloc), &(render->deviceMaxAlloc), NULL); printf("---OpenCL: Selected device has CL_DEVICE_MAX_MEM_ALLOC_SIZE: %lfMB\n", render->deviceMaxAlloc/1024.0/1024.0); // create a command queue render->queue = clCreateCommandQueue(render->contextCL, device, 0, &err); CheckOpenCLError(err, __LINE__); //create the program with the source above // printf("Creating CL Program...\n"); *program = clCreateProgramWithSource(render->contextCL, 1, (const char**)&kernelSource, NULL, &err); if (err != CL_SUCCESS) { printf("Error in clCreateProgramWithSource: %d, line %d.\n", err, __LINE__); return EXIT_FAILURE; } //build program executable err = clBuildProgram(*program, 0, NULL, "-I. -I src/", NULL, NULL); if (err != CL_SUCCESS) { printf("Error in clBuildProgram: %d, line %d.\n", err, __LINE__); char buffer[5000]; clGetProgramBuildInfo(*program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL); printf("%s\n", buffer); return EXIT_FAILURE; } // dump ptx size_t binSize; clGetProgramInfo(*program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binSize, NULL); unsigned char *bin = malloc(binSize); clGetProgramInfo(*program, CL_PROGRAM_BINARIES, sizeof(unsigned char *), &bin, NULL); FILE *fp = fopen("openclPTX.ptx", "wb"); fwrite(bin, sizeof(char), binSize, fp); fclose(fp); free(bin); free(numDevices); free(kernelSource); printf("\n"); return EXIT_SUCCESS; }
int main(void) { printf("\n" "Controls: - Left/Right Click to zoom in/out, centring on cursor position.\n" " - Left Click and Drag to pan.\n" " - r to reset view.\n" " - q,w to decrease, increase max iteration count\n" " - a,s to decrease, increase colour period\n" " - g to toggle Gaussian Blur after computation\n" " - b to run some benchmarks.\n" " - p to show a double-precision limited zoom.\n" " - h to save a high resolution image of the current view to current directory.\n" " - Esc to quit.\n\n"); // Set render function, dependent on compile time flag. All have the same signature, // with all necessary variables defined inside the structs. #ifdef WITHOPENCL RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotOpenCL; #elif defined(WITHAVX) RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotAVXCPU; // AVX double prec vector width (4) must divide horizontal (x) resolution assert(XRESOLUTION % 4 == 0); #elif defined(WITHGMP) RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotGMPCPU; #else RenderMandelbrotPtr RenderMandelbrot = &RenderMandelbrotCPU; #endif // Define and initialize structs imageStruct image; renderStruct render; // Set image resolution image.xRes = XRESOLUTION; image.yRes = YRESOLUTION; // Initial values for boundaries, iteration count SetInitialValues(&image); // Update OpenGL texture on render. This is disabled when rendering high resolution images render.updateTex = 1; // Allocate host memory, used to set up OpenGL texture, even if we are using interop OpenCL image.pixels = malloc(image.xRes * image.yRes * sizeof *(image.pixels) *3); // OpenGL variables and setup render.window = NULL; GLuint vertexShader, fragmentShader, shaderProgram; GLuint vao, vbo, ebo, tex; SetUpOpenGL(&(render.window), image.xRes, image.yRes, &vertexShader, &fragmentShader, &shaderProgram, &vao, &vbo, &ebo, &tex); #ifdef WITHOPENCL // OpenCL variables and setup cl_platform_id *platform; cl_device_id **device_id; cl_program program; cl_int err; render.globalSize = image.xRes * image.yRes; render.localSize = OPENCLLOCALSIZE; assert(render.globalSize % render.localSize == 0); // Initially set variable that controls interop of OpenGL and OpenCL to 0, set to 1 if // interop device found successfully render.glclInterop = 0; if (InitialiseCLEnvironment(&platform, &device_id, &program, &render) == EXIT_FAILURE) { printf("Error initialising OpenCL environment\n"); return EXIT_FAILURE; } size_t sizeBytes = image.xRes * image.yRes * 3 * sizeof(float); render.pixelsDevice = clCreateBuffer(render.contextCL, CL_MEM_READ_WRITE, sizeBytes, NULL, &err); // if we aren't using interop, allocate another buffer on the device for output, on the pointer // for the texture if (render.glclInterop == 0) { render.pixelsTex = clCreateBuffer(render.contextCL, CL_MEM_READ_WRITE, sizeBytes, NULL, &err); } // finish texture initialization so that we can use with OpenCL if glclInterop glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, image.xRes, image.yRes, 0, GL_RGB, GL_FLOAT, image.pixels); // Configure image from OpenGL texture "tex" if (render.glclInterop) { render.pixelsTex = clCreateFromGLTexture(render.contextCL, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, tex, &err); CheckOpenCLError(err, __LINE__); } // Create kernels render.renderMandelbrotKernel = clCreateKernel(program, "renderMandelbrotKernel", &err); CheckOpenCLError(err, __LINE__); render.gaussianBlurKernel = clCreateKernel(program, "gaussianBlurKernel", &err); CheckOpenCLError(err, __LINE__); render.gaussianBlurKernel2 = clCreateKernel(program, "gaussianBlurKernel2", &err); CheckOpenCLError(err, __LINE__); #endif // Start main loop: Update until we encounter user input. Look for Esc key (quit), left and right mount // buttons (zoom in on cursor position, zoom out on cursor position), "r" -- reset back to initial coords, // "b" -- run some benchmarks, "p" -- display a double precision limited zoom. // Re-render the Mandelbrot set as and when we need, in the user input conditionals. // Initial render: RenderMandelbrot(&render, &image); while (!glfwWindowShouldClose(render.window)) { // draw glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0); // Swap buffers glfwSwapBuffers(render.window); // USER INPUT TESTS. // Continuous render, poll for input: //glfwPollEvents(); // Render only on update, wait for input: glfwWaitEvents(); // if user presses Esc, close window to leave loop if (glfwGetKey(render.window, GLFW_KEY_ESCAPE) == GLFW_PRESS) { glfwSetWindowShouldClose(render.window, GL_TRUE); } // if user left-clicks in window, zoom in, centring on cursor position // if click and drag, simply re-position centre without zooming else if (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_LEFT) == GLFW_PRESS) { // Get Press cursor location double xPressPos, yPressPos, xReleasePos, yReleasePos; int shift = 0; glfwGetCursorPos(render.window, &xPressPos, &yPressPos); // Wait for mousebutton release, re-rendering as mouse moves while (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_LEFT) != GLFW_RELEASE) { glfwGetCursorPos(render.window, &xReleasePos, &yReleasePos); if (fabs(xReleasePos-xPressPos) > DRAGPIXELS || fabs(yReleasePos-yPressPos) > DRAGPIXELS) { // Set shift variable. Don't zoom after button release if this is 1 shift = 1; // Determine shift in mandelbrot coords double xShift = (xReleasePos-xPressPos)/(double)image.xRes*(image.xMax-image.xMin); double yShift = (yReleasePos-yPressPos)/(double)image.yRes*(image.yMax-image.yMin); // Add shift to boundaries image.xMin = image.xMin - xShift; image.xMax = image.xMax - xShift; image.yMin = image.yMin - yShift; image.yMax = image.yMax - yShift; // Update "current" (press) position xPressPos = xReleasePos; yPressPos = yReleasePos; // Re-render and draw RenderMandelbrot(&render, &image); glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0); glfwSwapBuffers(render.window); } glfwPollEvents(); } // else, zoom in smoothly over ZOOMSTEPS frames if (!shift) { SmoothZoom(&render, &image, RenderMandelbrot, xReleasePos, yReleasePos, ZOOMFACTOR, ITERSFACTOR); } } // if user right-clicks in window, zoom out, centring on cursor position else if (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_RIGHT) == GLFW_PRESS) { while (glfwGetMouseButton(render.window, GLFW_MOUSE_BUTTON_RIGHT) != GLFW_RELEASE) { glfwPollEvents(); } // Get cursor position, in *screen coordinates* double xReleasePos, yReleasePos; glfwGetCursorPos(render.window, &xReleasePos, &yReleasePos); // Zooming out, so use 1/FACTORs. SmoothZoom(&render, &image, RenderMandelbrot, xReleasePos, yReleasePos, 1.0/ZOOMFACTOR, 1.0/ITERSFACTOR); } // if user presses "r", reset view else if (glfwGetKey(render.window, GLFW_KEY_R) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_R) != GLFW_RELEASE) { glfwPollEvents(); } printf("Resetting...\n"); SetInitialValues(&image); RenderMandelbrot(&render, &image); } // if user presses "g", toggle gaussian blur else if (glfwGetKey(render.window, GLFW_KEY_G) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_G) != GLFW_RELEASE) { glfwPollEvents(); } if (image.gaussianBlur == 1) { printf("Toggling Gaussian Blur Off...\n"); image.gaussianBlur = 0; } else { printf("Toggling Gaussian Blur On...\n"); image.gaussianBlur = 1; } RenderMandelbrot(&render, &image); } // if user presses "q", decrease max iteration count else if (glfwGetKey(render.window, GLFW_KEY_Q) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_Q) != GLFW_RELEASE) { glfwPollEvents(); } printf("Decreasing max iteration count from %d to %d\n", image.maxIters, (int)(image.maxIters/ITERSFACTOR)); image.maxIters /= ITERSFACTOR; RenderMandelbrot(&render, &image); } // if user presses "w", increase max iteration count else if (glfwGetKey(render.window, GLFW_KEY_W) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_W) != GLFW_RELEASE) { glfwPollEvents(); } printf("Increasing max iteration count from %d to %d\n", image.maxIters, (int)(image.maxIters*ITERSFACTOR)); image.maxIters *= ITERSFACTOR; RenderMandelbrot(&render, &image); } // if user presses "a", decrease colour period else if (glfwGetKey(render.window, GLFW_KEY_A) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_A) != GLFW_RELEASE) { glfwPollEvents(); } printf("Decreasing colour period from %.0lf to %.0lf\n", image.colourPeriod, fmax(32, image.colourPeriod-32)); image.colourPeriod = fmax(32, image.colourPeriod-32); RenderMandelbrot(&render, &image); } // if user presses "s", increase colour period else if (glfwGetKey(render.window, GLFW_KEY_S) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_S) != GLFW_RELEASE) { glfwPollEvents(); } printf("Increasing colour period from %.0lf to %.0lf\n", image.colourPeriod, image.colourPeriod+32); image.colourPeriod += 32; RenderMandelbrot(&render, &image); } // if user presses "b", run some benchmarks. else if (glfwGetKey(render.window, GLFW_KEY_B) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_B) != GLFW_RELEASE) { glfwPollEvents(); } printf("Running Benchmarks...\n"); printf("Whole fractal:\n"); SetInitialValues(&image); RunBenchmark(&render, &image, RenderMandelbrot); printf("Early Bail-out:\n"); image.xMin = -0.8153143016681144; image.xMax = -0.6839170011300622; image.yMin = -0.0365167077914237; image.yMax = 0.0373942737612310; image.maxIters = 112; RunBenchmark(&render, &image, RenderMandelbrot); printf("Spiral:\n"); image.xMin = -0.8673755781976442; image.xMax = -0.8673711898931797; image.yMin = -0.2156059883952151; image.yMax = -0.2156035199739536; image.maxIters = 1757; RunBenchmark(&render, &image, RenderMandelbrot); printf("Highly zoomed:\n"); image.xMin = -0.8712903154956539; image.xMax = -0.8712903108993595; image.yMin = -0.2293516610223087; image.yMax = -0.2293516584368930; image.maxIters = 10750; RunBenchmark(&render, &image, RenderMandelbrot); printf("Complete.\n"); // Re-render with original coords SetInitialValues(&image); RenderMandelbrot(&render, &image); } // if user presses "p", zoom in, such that the double precision algorithm looks pixellated else if (glfwGetKey(render.window, GLFW_KEY_P) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_P) != GLFW_RELEASE) { glfwPollEvents(); } printf("Precision test...\n"); image.xMin = -1.25334325335487362; image.xMax = -1.25334325335481678; image.yMin = -0.34446232396119353; image.yMax = -0.34446232396116155; image.maxIters = 1389952; RenderMandelbrot(&render, &image); } // if user presses "h", render a high resolution version of the current view, and // save it to disk as an image else if (glfwGetKey(render.window, GLFW_KEY_H) == GLFW_PRESS) { while (glfwGetKey(render.window, GLFW_KEY_H) != GLFW_RELEASE) { glfwPollEvents(); } double startTime = GetWallTime(); printf("Saving high resolution (%d x %d) image...\n", image.xRes*HIGHRESOLUTIONMULTIPLIER, image.yRes*HIGHRESOLUTIONMULTIPLIER); HighResolutionRender(&render, &image, RenderMandelbrot); printf(" --- done. Total time: %lfs\n", GetWallTime()-startTime); } } // clean up #ifdef WITHOPENCL CleanUpCLEnvironment(&platform, &device_id, &(render.contextCL), &(render.queue), &program); #endif glDeleteProgram(shaderProgram); glDeleteShader(fragmentShader); glDeleteShader(vertexShader); glDeleteBuffers(1, &ebo); glDeleteBuffers(1, &vbo); glDeleteVertexArrays(1, &vao); // Close OpenGL window and terminate GLFW glfwDestroyWindow(render.window); glfwTerminate(); // Free dynamically allocated memory free(image.pixels); return 0; }
void HighResolutionRender(renderStruct *render, imageStruct *image, RenderMandelbrotPtr RenderMandelbrot) { // Set new resolution image->xRes = image->xRes*HIGHRESOLUTIONMULTIPLIER; image->yRes = image->yRes*HIGHRESOLUTIONMULTIPLIER; //Set updateTex flag to 0 so we don't try to draw it on screen. render->updateTex = 0; // Allocate new host pixels array of larger size. We need this regardless of interop free(image->pixels); // CAREFUL: these sizes can easily overflow a 32bit int. Use uint64_t uint64_t allocSize = image->xRes * image->yRes * sizeof*(image->pixels) *3; printf(" --- reallocating host pixels array: %.2lfMB\n", allocSize/1024.0/1024.0); image->pixels = malloc(allocSize); #ifdef WITHOPENCL // Here, it is likely that the array(s) of colour values will not fit in device global memory. // We have to render the frame in tiles, each of which fits. // // OpenCL platform gives us a max allocation, which is "at least" 1/4 of the memory size. We // can't therefore, allocate two arrays of the max allocation -- they are not guaranteed to fit. // We allocate two arrays, each half the max allocation. cl_int err; // Determine the size (in bytes) of one row of pixels uint64_t rowSize = image->xRes * sizeof *(image->pixels) * 3; // Determine how many rows we can fit in half the max allocation uint64_t maxAllocRows = (render->deviceMaxAlloc/2) / rowSize; // The number of tiles required to render the frame: int tiles = (int)ceil((double)image->yRes/(double)maxAllocRows); // Size of a single tile uint64_t fullTileSize = maxAllocRows * rowSize; // Allocate tile-sized global memory arrays on device. // Store handle to OpenGL texture so it can be recovered later. // The struct variable will be reassigned, this makes running the kernels simpler. cl_mem keepPixelsTex = render->pixelsTex; // Release the existing pixels array err = clReleaseMemObject(render->pixelsDevice); CheckOpenCLError(err, __LINE__); // Allocate new arrays printf(" --- allocating tile arrays on device: %lfMB\n", 2.0*fullTileSize/1024.0/1024.0); render->pixelsDevice = clCreateBuffer(render->contextCL, CL_MEM_READ_WRITE, fullTileSize, NULL, &err); CheckOpenCLError(err, __LINE__); render->pixelsTex = clCreateBuffer(render->contextCL, CL_MEM_READ_WRITE, fullTileSize, NULL, &err); CheckOpenCLError(err, __LINE__); // Store full image resolution and boundaries, we will adjust the values in the struct int yResFull = image->yRes; double yMinFull = image->yMin; double yMaxFull = image->yMax; // Render tiles, and copy data back to the host array for (int t = 0; t < tiles; t++) { printf(" --- computing tile %d/%d...\n", t+1, tiles); // Set tile resolution. Each has maxAllocRows apart from the last, // which might have fewer as it contains the remainder. image->yRes = maxAllocRows; if (t == tiles-1) { image->yRes = yResFull % maxAllocRows; } // Reset global size, as the resolution has changed render->globalSize = image->yRes * image->xRes; assert(render->globalSize % render->localSize == 0); // Set tile boundaries. We are computing a fraction maxAllocRows/yResFull of the full //image, starting at the beginning of the t-th tile. The final tile uses xMaxFull for xMax. image->yMin = yMinFull + t*((double)maxAllocRows/(double)yResFull)*(yMaxFull-yMinFull); image->yMax = yMinFull + (t+1)*((double)maxAllocRows/(double)yResFull)*(yMaxFull-yMinFull); if (t == tiles-1) { image->yMax = yMaxFull; } // Render RenderMandelbrot(render, image); // Copy data from render->pixelsTex (the output of GaussianBlurKernel2) uint64_t storeOffset = t * (maxAllocRows * image->xRes * 3); // The final tile is smaller if (t == tiles-1) { fullTileSize = image->yRes * image->xRes * sizeof *(image->pixels) * 3; } err = clEnqueueReadBuffer(render->queue, render->pixelsTex, CL_TRUE, 0, fullTileSize, &(image->pixels[storeOffset]), 0, NULL, NULL); CheckOpenCLError(err, __LINE__); } // Reset image boundaries and resolution image->yRes = yResFull; image->yMin = yMinFull; image->yMax = yMaxFull; #else // If not using OpenCL, simply render directly onto the reallocated image->pixels array RenderMandelbrot(render, image); #endif // Save png printf(" --- creating png...\n"); // Create raw pixel array GLubyte * rawPixels; uint64_t rawAllocSize = image->xRes * image->yRes * sizeof *rawPixels *3; rawPixels = malloc(rawAllocSize); for (uint64_t i = 0; i < image->xRes*image->yRes*3; i+=3) { // note change in order, rgb -> bgr! rawPixels[i+0] = (GLubyte)((image->pixels)[i+2]*255); rawPixels[i+1] = (GLubyte)((image->pixels)[i+1]*255); rawPixels[i+2] = (GLubyte)((image->pixels)[i+0]*255); } FIBITMAP* img = FreeImage_ConvertFromRawBits(rawPixels, image->xRes, image->yRes, 3*image->xRes, 24, 0x000000, 0x000000, 0x000000, TRUE); FreeImage_Save(FIF_PNG, img, "test.png", 0); FreeImage_Unload(img); free(rawPixels); // Reset original values to continue with live rendering image->xRes = image->xRes/HIGHRESOLUTIONMULTIPLIER; image->yRes = image->yRes/HIGHRESOLUTIONMULTIPLIER; render->updateTex = 1; free(image->pixels); image->pixels = malloc(image->xRes * image->yRes * sizeof *(image->pixels) *3); #ifdef WITHOPENCL // Release large global memory buffers clReleaseMemObject(render->pixelsDevice); clReleaseMemObject(render->pixelsTex); // Recover handle to OpenGL texture render->pixelsTex = keepPixelsTex; size_t allocSizeOrig = image->xRes * image->yRes * 3 * sizeof *(image->pixels); render->pixelsDevice = clCreateBuffer(render->contextCL, CL_MEM_READ_WRITE, allocSizeOrig, NULL, &err); CheckOpenCLError(err, __LINE__); // Reset global size, as the resolution has changed render->globalSize = image->yRes * image->xRes; assert(render->globalSize % render->localSize == 0); #endif }
void PGR_radiosity::releaseCL() { /* Releases OpenCL resources (Context, Memory etc.) */ cl_int status; status = clReleaseKernel(this->sortKernel); CheckOpenCLError(status, "clReleaseKernel sortKernel."); status = clReleaseKernel(this->radiosityKernel); CheckOpenCLError(status, "clReleaseKernel radiosityKernel."); status = clReleaseMemObject(this->indicesCL); CheckOpenCLError(status, "clReleaseMemObject indicesCL"); status = clReleaseMemObject(this->patchesColorsCL); CheckOpenCLError(status, "clReleaseMemObject patchesCL"); status = clReleaseMemObject(this->patchesGeoCL); CheckOpenCLError(status, "clReleaseMemObject patchesGeometryCL"); status = clReleaseMemObject(this->indicesCountCL); CheckOpenCLError(status, "clReleaseMemObject indicesCountCL"); status = clReleaseMemObject(this->patchesEnergiesCL); CheckOpenCLError(status, "clReleaseMemObject patchesEnergiesCL"); status = clReleaseMemObject(this->maximalEnergyCL); CheckOpenCLError(status, "clReleaseMemObject maximalEnergyCL"); status = clReleaseMemObject(this->diffColorsCL); CheckOpenCLError(status, "clReleaseMemObject diffColorsCL"); status = clReleaseMemObject(this->intensitiesCL); CheckOpenCLError(status, "clReleaseMemObject intensitiesCL"); status = clReleaseMemObject(this->texturesCL); CheckOpenCLError(status, "clReleaseMemObject texturesCL"); status = clReleaseProgram(this->program); CheckOpenCLError(status, "clReleaseProgram."); status = clReleaseCommandQueue(this->queue); CheckOpenCLError(status, "clReleaseCommandQueue."); status = clReleaseContext(this->context); CheckOpenCLError(status, "clReleaseContext."); delete [] this->raw_patchesColors; delete [] this->raw_patchesGeo; delete [] this->raw_patchesEnergies; delete [] this->raw_intensities; delete [] this->raw_diffColors; delete [] this->raw_textures; }
void PGR_radiosity::runRadiosityKernelCL() { int status; int cycles = 0; /* Events */ cl_event event_radiosity, event_sort, event_maximalEnergy, event_indices, event_textures, event_indicesCount; float maximalEnergy; maximalEnergy = (float) this->model->getMaximalEnergy(); /* Setup arguments to the radiosity kernel */ status = clSetKernelArg(this->radiosityKernel, 0, sizeof (cl_mem), &this->patchesGeoCL); CheckOpenCLError(status, "clSetKernelArg. (patchesCL)"); status = clSetKernelArg(this->radiosityKernel, 1, sizeof (cl_mem), &this->patchesColorsCL); CheckOpenCLError(status, "clSetKernelArg. (patchesCL)"); cl_uint patchesCount = (cl_uint)this->model->getPatchesCount(); status = clSetKernelArg(this->radiosityKernel, 2, sizeof (cl_uint), &patchesCount); CheckOpenCLError(status, "clSetKernelArg. (patchesCount)"); /* Set initial energy */ this->raw_indices = new cl_uint[this->workGroupSize]; cl_uint indicesCount = this->model->getIdsOfNMostEnergizedPatchesCL(this->raw_indices, this->workGroupSize, LIMIT); status = clEnqueueWriteBuffer(this->queue, this->indicesCountCL, CL_TRUE, //blocking write 0, sizeof (cl_uint), &indicesCount, 0, 0, 0); CheckOpenCLError(status, "Copy indicesCount to GPU"); status = clEnqueueWriteBuffer(this->queue, this->indicesCL, CL_TRUE, //blocking write 0, indicesCount * sizeof (cl_uint), this->raw_indices, 0, 0, 0); CheckOpenCLError(status, "Copy indices to GPU"); status = clSetKernelArg(this->radiosityKernel, 3, sizeof (cl_mem), &this->indicesCL); CheckOpenCLError(status, "clSetKernelArg. (indicesCL)"); status = clSetKernelArg(this->radiosityKernel, 4, sizeof (cl_mem), &this->indicesCountCL); CheckOpenCLError(status, "clSetKernelArg. (indicesCount)"); status = clSetKernelArg(this->radiosityKernel, 5, sizeof (cl_mem), &this->patchesEnergiesCL); CheckOpenCLError(status, "clSetKernelArg. (patchesCL)"); status = clSetKernelArg(this->radiosityKernel, 6, sizeof (cl_mem), &this->diffColorsCL); CheckOpenCLError(status, "clSetKernelArg. (diffColorsCL)"); status = clSetKernelArg(this->radiosityKernel, 7, sizeof (cl_mem), &this->intensitiesCL); CheckOpenCLError(status, "clSetKernelArg. (intensitiesCL)"); this->raw_textures = new cl_uchar3[768 * 256 * this->workGroupSize]; this->model->getTextureCL(this->raw_textures, this->raw_indices, indicesCount); status = clEnqueueWriteBuffer(this->queue, this->texturesCL, CL_TRUE, //blocking write 0, 768 * 256 * indicesCount * sizeof (cl_uchar3), this->raw_textures, 0, 0, 0); CheckOpenCLError(status, "Copy textures to GPU"); status = clSetKernelArg(this->radiosityKernel, 8, sizeof (cl_mem), &this->texturesCL); CheckOpenCLError(status, "clSetKernelArg. (texturesCL)"); status = clSetKernelArg(this->radiosityKernel, 9, sizeof (cl_mem), &this->visitedCL); CheckOpenCLError(status, "clSetKernelArg. (visitedCL)"); size_t globalThreadsMain[] = {this->workGroupSize}; size_t localThreadsMain[] = {this->workGroupSize}; /* Set arguments for sort kernel */ status = clSetKernelArg(this->sortKernel, 0, sizeof (cl_mem), &this->patchesEnergiesCL); CheckOpenCLError(status, "clSetKernelArg. (patchesCL)"); status = clSetKernelArg(this->sortKernel, 1, sizeof (cl_uint), &patchesCount); CheckOpenCLError(status, "clSetKernelArg. (patchesCount)"); status = clSetKernelArg(this->sortKernel, 2, sizeof (cl_mem), &this->indicesCL); CheckOpenCLError(status, "clSetKernelArg. (indicesCL)"); status = clSetKernelArg(this->sortKernel, 3, sizeof (cl_mem), &this->indicesCountCL); CheckOpenCLError(status, "clSetKernelArg. (indicesCount)"); status = clSetKernelArg(this->sortKernel, 4, sizeof (cl_uint), &this->workGroupSize); CheckOpenCLError(status, "clSetKernelArg. (n)"); float limit = LIMIT; status = clSetKernelArg(this->sortKernel, 5, sizeof (cl_float), &limit); CheckOpenCLError(status, "clSetKernelArg. (limit)"); status = clSetKernelArg(this->sortKernel, 6, sizeof (cl_mem), &this->maximalEnergyCL); CheckOpenCLError(status, "clSetKernelArg. (maximalEnergy)"); size_t globalThreadsSort[] = {1}; //only one kernel computes size_t localThreadsSort[] = {1}; cl_bool* zeroVisited = new cl_bool[this->maxWorkGroupSize * this->model->getPatchesCount()]; memset(zeroVisited, 0, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool)); debug_log = false; while (maximalEnergy > LIMIT) { cout << cycles << " energy: " << maximalEnergy << endl; cycles++; /* Start kernel - radiosity step*/ status = clEnqueueNDRangeKernel(this->queue, this->radiosityKernel, 1, //1D NULL, //offset globalThreadsMain, localThreadsMain, 0, NULL, &event_radiosity); CheckOpenCLError(status, "clEnqueueNDRangeKernel radiosityKernel."); /* Start kernel - recompute indices array */ status = clEnqueueNDRangeKernel(this->queue, this->sortKernel, 1, //1D NULL, //offset globalThreadsSort, localThreadsSort, 1, &event_radiosity, &event_sort); CheckOpenCLError(status, "clEnqueueNDRangeKernel sortKernel."); /* Read maximal energy from buffer */ status = clEnqueueReadBuffer(this->queue, this->maximalEnergyCL, CL_TRUE, //blocking write 0, sizeof (cl_float), &maximalEnergy, 1, &event_sort, &event_maximalEnergy); CheckOpenCLError(status, "Read maximal energy"); status = clEnqueueReadBuffer(this->queue, this->indicesCountCL, CL_TRUE, //blocking write 0, sizeof (cl_uint), &indicesCount, 1, &event_sort, &event_indicesCount); CheckOpenCLError(status, "Read indices count"); status = clWaitForEvents(1, &event_indicesCount); CheckOpenCLError(status, "clWaitForEvents read indices count."); if (indicesCount == 0) break; status = clEnqueueReadBuffer(this->queue, this->indicesCL, CL_TRUE, //blocking write 0, indicesCount * sizeof (cl_uint), this->raw_indices, 0, NULL, &event_indices); CheckOpenCLError(status, "Read indices"); status = clWaitForEvents(1, &event_indices); CheckOpenCLError(status, "clWaitForEvents read Indices"); this->model->getTextureCL(this->raw_textures, this->raw_indices, indicesCount); status = clEnqueueWriteBuffer(this->queue, this->texturesCL, CL_TRUE, //blocking write 0, 768 * 256 * indicesCount * sizeof (cl_uchar3), this->raw_textures, 0, NULL, &event_textures); CheckOpenCLError(status, "Copy textures to GPU"); status = clWaitForEvents(1, &event_textures); CheckOpenCLError(status, "clWaitForEvents write textures."); status = clEnqueueWriteBuffer(this->queue, this->visitedCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_bool), zeroVisited, 0, 0, 0); CheckOpenCLError(status, "Clear visited flags"); status = clWaitForEvents(1, &event_maximalEnergy); CheckOpenCLError(status, "clWaitForEvents read Maximal energy."); } delete [] zeroVisited; cout << "cycles: " << cycles << endl; debug_log = true; }
int PGR_radiosity::prepareCL() { cl_int ciErr = CL_SUCCESS; // Get Platform cl_platform_id *cpPlatforms; cl_uint cuiPlatformsCount; ciErr = clGetPlatformIDs(0, NULL, &cuiPlatformsCount); this->CheckOpenCLError(ciErr, "clGetPlatformIDs: cuiPlatformsNum=%i", cuiPlatformsCount); cpPlatforms = (cl_platform_id*) malloc(cuiPlatformsCount * sizeof (cl_platform_id)); ciErr = clGetPlatformIDs(cuiPlatformsCount, cpPlatforms, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformIDs"); cl_platform_id platform = 0; const unsigned int TMP_BUFFER_SIZE = 1024; char sTmp[TMP_BUFFER_SIZE]; for (unsigned int f0 = 0; f0 < cuiPlatformsCount; f0++) { //bool shouldBrake = false; ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_PROFILE=%s", f0, sTmp); ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VERSION, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VERSION=%s", f0, sTmp); ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_NAME, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_NAME=%s", f0, sTmp); ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_VENDOR=%s", f0, sTmp); //prioritize AMD and CUDA platforms if ((strcmp(sTmp, "NVIDIA Corporation") == 0)) { platform = cpPlatforms[f0]; } // if ((strcmp(sTmp, "Advanced Micro Devices, Inc.") == 0)) // { // platform = cpPlatforms[f0]; // } //prioritize Intel /*if ((strcmp(sTmp, "Intel(R) Corporation") == 0)) { platform = cpPlatforms[f0]; }*/ ciErr = clGetPlatformInfo(cpPlatforms[f0], CL_PLATFORM_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL); this->CheckOpenCLError(ciErr, "clGetPlatformInfo: Id=%i: CL_PLATFORM_EXTENSIONS=%s", f0, sTmp); } if (platform == 0) { //no prioritized found if (cuiPlatformsCount > 0) { platform = cpPlatforms[0]; } else { cerr << "No device was found" << endl; return -1; } } // Get Devices cl_uint cuiDevicesCount; ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &cuiDevicesCount); CheckOpenCLError(ciErr, "clGetDeviceIDs: cuiDevicesCount=%i", cuiDevicesCount); cl_device_id *cdDevices = (cl_device_id*) malloc(cuiDevicesCount * sizeof (cl_device_id)); ciErr = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, cuiDevicesCount, cdDevices, NULL); CheckOpenCLError(ciErr, "clGetDeviceIDs"); unsigned int deviceIndex = 0; for (unsigned int f0 = 0; f0 < cuiDevicesCount; f0++) { cl_device_type cdtTmp; size_t iDim[3]; ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_TYPE, sizeof (cdtTmp), &cdtTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_TYPE=%s%s%s%s", f0, cdtTmp & CL_DEVICE_TYPE_CPU ? "CPU," : "", cdtTmp & CL_DEVICE_TYPE_GPU ? "GPU," : "", cdtTmp & CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR," : "", cdtTmp & CL_DEVICE_TYPE_DEFAULT ? "DEFAULT," : ""); if (cdtTmp & CL_DEVICE_TYPE_GPU) { //prioritize gpu if both cpu and gpu are available deviceIndex = f0; } cl_bool bTmp; ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_AVAILABLE, sizeof (bTmp), &bTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_AVAILABLE=%s", f0, bTmp ? "YES" : "NO"); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_NAME, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_NAME=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VENDOR, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VENDOR=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DRIVER_VERSION, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DRIVER_VERSION=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_PROFILE, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_PROFILE=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_VERSION, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_VERSION=%s", f0, sTmp); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof (iDim), iDim, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_ITEM_SIZES=%ix%ix%i", f0, iDim[0], iDim[1], iDim[2]); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t), iDim, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_MAX_WORK_GROUP_SIZE=%i", f0, iDim[0]); ciErr = clGetDeviceInfo(cdDevices[f0], CL_DEVICE_EXTENSIONS, TMP_BUFFER_SIZE, sTmp, NULL); CheckOpenCLError(ciErr, "clGetDeviceInfo: Id=%i: CL_DEVICE_EXTENSIONS=%s", f0, sTmp); } cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0}; /* Create context */ this->context = clCreateContext(cps, 1, &cdDevices[deviceIndex], NULL, NULL, &ciErr); CheckOpenCLError(ciErr, "clCreateContext"); /* Create a command queue */ this->queue = clCreateCommandQueue(this->context, cdDevices[deviceIndex], CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &ciErr); CheckOpenCLError(ciErr, "clCreateCommandQueue"); /* Create and compile and openCL program */ char *cSourceCL = loadProgSource("kernels.cl"); this->program = clCreateProgramWithSource(this->context, 1, (const char **) &cSourceCL, NULL, &ciErr); CheckOpenCLError(ciErr, "clCreateProgramWithSource"); free(cSourceCL); ciErr = clBuildProgram(this->program, 0, NULL, NULL, NULL, NULL); CheckOpenCLError(ciErr, "clBuildProgram"); cl_int logStatus; char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo(this->program, cdDevices[deviceIndex], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize); CheckOpenCLError(logStatus, "clGetProgramBuildInfo."); buildLog = (char*) malloc(buildLogSize); if (buildLog == NULL) { printf("Failed to allocate host memory. (buildLog)"); return -1; } memset(buildLog, 0, buildLogSize); logStatus = clGetProgramBuildInfo(this->program, cdDevices[deviceIndex], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL); CheckOpenCLError(logStatus, "clGetProgramBuildInfo."); free(buildLog); size_t tempKernelWorkGroupSize; /* Create kernels */ this->radiosityKernel = clCreateKernel(program, "radiosity", &ciErr); CheckOpenCLError(ciErr, "clCreateKernel radiosity"); this->sortKernel = clCreateKernel(program, "sort", &ciErr); CheckOpenCLError(ciErr, "clCreateKernel sort"); this->maxWorkGroupSize = 64; this->workGroupSize = 64; ciErr = clGetKernelWorkGroupInfo(this->radiosityKernel, cdDevices[deviceIndex], CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &tempKernelWorkGroupSize, 0); CheckOpenCLError(ciErr, "clGetKernelInfo"); this->maxWorkGroupSize = MIN(tempKernelWorkGroupSize, this->maxWorkGroupSize); if (this->workGroupSize > this->maxWorkGroupSize) { cout << "Out of Resources!" << endl; cout << "Group Size specified: " << this->workGroupSize << endl; cout << "Max Group Size supported on the kernel: " << this->maxWorkGroupSize << endl; cout << "Falling back to " << this->maxWorkGroupSize << endl; this->workGroupSize = this->maxWorkGroupSize; } /* Allocate buffer of colors */ this->patchesColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer patchesCL"); this->raw_patchesColors = new cl_uchar3[this->model->getPatchesCount()]; this->raw_patchesEnergies = new cl_float[this->model->getPatchesCount()]; this->raw_diffColors = new cl_uchar3[this->model->getPatchesCount()]; this->raw_intensities = new cl_float[this->model->getPatchesCount()]; this->model->getPatchesCL(this->raw_patchesColors, this->raw_patchesEnergies); ciErr = clEnqueueWriteBuffer(this->queue, this->patchesColorsCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_uchar3), this->raw_patchesColors, 0, 0, 0); CheckOpenCLError(ciErr, "Copy patches colors"); /* Alocate buffer of energies */ this->patchesEnergiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer patchesCL"); ciErr = clEnqueueWriteBuffer(this->queue, this->patchesEnergiesCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_float), this->raw_patchesEnergies, 0, 0, 0); CheckOpenCLError(ciErr, "Copy patches"); /* Allocate buffer of patches geometry */ this->patchesGeoCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->model->getPatchesCount() * sizeof (cl_float8), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer patchesGeometryCL"); this->raw_patchesGeo = new cl_float8[this->model->getPatchesCount()]; this->model->getPatchesGeometryCL(raw_patchesGeo); ciErr = clEnqueueWriteBuffer(this->queue, this->patchesGeoCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_float8), this->raw_patchesGeo, 0, 0, 0); CheckOpenCLError(ciErr, "Copy patches geometry"); this->indicesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * sizeof (cl_uint), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer indicesCL"); this->indicesCountCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_uint), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer indicesCountCL"); this->maximalEnergyCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, sizeof (cl_float), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer maximalEnergyCL"); this->diffColorsCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_uchar3), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer diffColorsCL"); cl_uchar3* zeros = new cl_uchar3[this->model->getPatchesCount()]; memset(zeros, 0, this->model->getPatchesCount() * sizeof (cl_uchar3)); ciErr = clEnqueueWriteBuffer(this->queue, this->diffColorsCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_uchar3), zeros, 0, 0, 0); CheckOpenCLError(ciErr, "Clear diff colors"); delete [] zeros; this->intensitiesCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->model->getPatchesCount() * sizeof (cl_float), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer intensitiesCL"); cl_float* zeroIntensity = new cl_float[this->model->getPatchesCount()]; memset(zeroIntensity, 0, this->model->getPatchesCount() * sizeof (cl_float)); ciErr = clEnqueueWriteBuffer(this->queue, this->intensitiesCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_float), zeroIntensity, 0, 0, 0); CheckOpenCLError(ciErr, "Clear intensities"); delete [] zeroIntensity; this->texturesCL = clCreateBuffer(this->context, CL_MEM_READ_ONLY, this->maxWorkGroupSize * 768 * 256 * sizeof (cl_uchar3), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer texturesCL"); this->visitedCL = clCreateBuffer(this->context, CL_MEM_READ_WRITE, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool), 0, &ciErr); CheckOpenCLError(ciErr, "CreateBuffer visitedCL"); cl_bool* zeroVisited = new cl_bool[this->maxWorkGroupSize * this->model->getPatchesCount()]; memset(zeroVisited, 0, this->maxWorkGroupSize * this->model->getPatchesCount() * sizeof (cl_bool)); ciErr = clEnqueueWriteBuffer(this->queue, this->visitedCL, CL_TRUE, //blocking write 0, this->model->getPatchesCount() * sizeof (cl_bool), zeroVisited, 0, 0, 0); CheckOpenCLError(ciErr, "Clear visited flags"); delete [] zeroVisited; free(cdDevices); return 0; }