void PathOCLRenderThread::AllocOCLBufferRW(cl::Buffer **buff, const size_t size, const string &desc) { if (*buff) { // Check the size of the already allocated buffer if (size == (*buff)->getInfo<CL_MEM_SIZE>()) { // I can reuse the buffer //LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer reused for size: " << (size / 1024) << "Kbytes"); return; } } cl::Context &oclContext = intersectionDevice->GetOpenCLContext(); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer size: " << (size / 1024) << "Kbytes"); *buff = new cl::Buffer(oclContext, CL_MEM_READ_WRITE, size); intersectionDevice->AllocMemory((*buff)->getInfo<CL_MEM_SIZE>()); }
void PathOCLRenderThread::AllocOCLBufferRO(cl::Buffer **buff, void *src, const size_t size, const string &desc) { if (*buff) { // Check the size of the already allocated buffer if (size == (*buff)->getInfo<CL_MEM_SIZE>()) { // I can reuse the buffer; just update the content //LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer updated for size: " << (size / 1024) << "Kbytes"); cl::CommandQueue &oclQueue = intersectionDevice->GetOpenCLQueue(); oclQueue.enqueueWriteBuffer(**buff, CL_FALSE, 0, size, src); return; } } cl::Context &oclContext = intersectionDevice->GetOpenCLContext(); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer size: " << (size / 1024) << "Kbytes"); *buff = new cl::Buffer(oclContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, src); intersectionDevice->AllocMemory((*buff)->getInfo<CL_MEM_SIZE>()); }
void PathOCLRenderThread::InitRender() { Scene *scene = renderEngine->renderConfig->scene; cl::Context &oclContext = intersectionDevice->GetOpenCLContext(); cl::Device &oclDevice = intersectionDevice->GetOpenCLDevice(); const OpenCLDeviceDescription *deviceDesc = intersectionDevice->GetDeviceDesc(); double tStart, tEnd; //-------------------------------------------------------------------------- // FrameBuffer definition //-------------------------------------------------------------------------- InitFrameBuffer(); //-------------------------------------------------------------------------- // Camera definition //-------------------------------------------------------------------------- InitCamera(); //-------------------------------------------------------------------------- // Scene geometry //-------------------------------------------------------------------------- InitGeometry(); //-------------------------------------------------------------------------- // Translate material definitions //-------------------------------------------------------------------------- InitMaterials(); //-------------------------------------------------------------------------- // Translate area lights //-------------------------------------------------------------------------- InitAreaLights(); //-------------------------------------------------------------------------- // Check if there is an infinite light source //-------------------------------------------------------------------------- InitInfiniteLight(); //-------------------------------------------------------------------------- // Check if there is an sun light source //-------------------------------------------------------------------------- InitSunLight(); //-------------------------------------------------------------------------- // Check if there is an sky light source //-------------------------------------------------------------------------- InitSkyLight(); const unsigned int areaLightCount = renderEngine->compiledScene->areaLights.size(); if (!skyLightBuff && !sunLightBuff && !infiniteLightBuff && (areaLightCount == 0)) throw runtime_error("There are no light sources supported by PathOCL in the scene"); //-------------------------------------------------------------------------- // Translate mesh texture maps //-------------------------------------------------------------------------- InitTextureMaps(); //-------------------------------------------------------------------------- // Allocate Ray/RayHit buffers //-------------------------------------------------------------------------- const unsigned int taskCount = renderEngine->taskCount; tStart = WallClockTime(); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Ray buffer size: " << (sizeof(Ray) * taskCount / 1024) << "Kbytes"); raysBuff = new cl::Buffer(oclContext, CL_MEM_READ_WRITE, sizeof(Ray) * taskCount); deviceDesc->AllocMemory(raysBuff->getInfo<CL_MEM_SIZE>()); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] RayHit buffer size: " << (sizeof(RayHit) * taskCount / 1024) << "Kbytes"); hitsBuff = new cl::Buffer(oclContext, CL_MEM_READ_WRITE, sizeof(RayHit) * taskCount); deviceDesc->AllocMemory(hitsBuff->getInfo<CL_MEM_SIZE>()); tEnd = WallClockTime(); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] OpenCL buffer creation time: " << int((tEnd - tStart) * 1000.0) << "ms"); //-------------------------------------------------------------------------- // Allocate GPU task buffers //-------------------------------------------------------------------------- // TODO: clenup all this mess const size_t gpuTaksSizePart1 = // Seed size sizeof(PathOCL::Seed); const size_t uDataEyePathVertexSize = // IDX_SCREEN_X, IDX_SCREEN_Y sizeof(float) * 2 + // IDX_DOF_X, IDX_DOF_Y ((scene->camera->lensRadius > 0.f) ? (sizeof(float) * 2) : 0); const size_t uDataPerPathVertexSize = // IDX_TEX_ALPHA, ((texMapAlphaBuff) ? sizeof(float) : 0) + // IDX_BSDF_X, IDX_BSDF_Y, IDX_BSDF_Z sizeof(float) * 3 + // IDX_DIRECTLIGHT_X, IDX_DIRECTLIGHT_Y, IDX_DIRECTLIGHT_Z (((areaLightCount > 0) || sunLightBuff) ? (sizeof(float) * 3) : 0) + // IDX_RR sizeof(float); const size_t uDataSize = (renderEngine->sampler->type == PathOCL::INLINED_RANDOM) ? // Only IDX_SCREEN_X, IDX_SCREEN_Y (sizeof(float) * 2) : ((renderEngine->sampler->type == PathOCL::METROPOLIS) ? (sizeof(float) * 2 + sizeof(unsigned int) * 5 + sizeof(Spectrum) + 2 * (uDataEyePathVertexSize + uDataPerPathVertexSize * renderEngine->maxPathDepth)) : (uDataEyePathVertexSize + uDataPerPathVertexSize * renderEngine->maxPathDepth)); size_t sampleSize = // uint pixelIndex; ((renderEngine->sampler->type == PathOCL::METROPOLIS) ? 0 : sizeof(unsigned int)) + uDataSize + // Spectrum radiance; sizeof(Spectrum); stratifiedDataSize = 0; if (renderEngine->sampler->type == PathOCL::STRATIFIED) { PathOCL::StratifiedSampler *s = (PathOCL::StratifiedSampler *)renderEngine->sampler; stratifiedDataSize = // stratifiedScreen2D sizeof(float) * s->xSamples * s->ySamples * 2 + // stratifiedDof2D ((scene->camera->lensRadius > 0.f) ? (sizeof(float) * s->xSamples * s->ySamples * 2) : 0) + // stratifiedAlpha1D ((texMapAlphaBuff) ? (sizeof(float) * s->xSamples) : 0) + // stratifiedBSDF2D sizeof(float) * s->xSamples * s->ySamples * 2 + // stratifiedBSDF1D sizeof(float) * s->xSamples + // stratifiedLight2D // stratifiedLight1D (((areaLightCount > 0) || sunLightBuff) ? (sizeof(float) * s->xSamples * s->ySamples * 2 + sizeof(float) * s->xSamples) : 0); sampleSize += stratifiedDataSize; } const size_t gpuTaksSizePart2 = sampleSize; const size_t gpuTaksSizePart3 = // PathState size ((((areaLightCount > 0) || sunLightBuff) ? sizeof(PathOCL::PathStateDL) : sizeof(PathOCL::PathState)) + //unsigned int diffuseVertexCount; ((renderEngine->maxDiffusePathVertexCount < renderEngine->maxPathDepth) ? sizeof(unsigned int) : 0)); const size_t gpuTaksSize = gpuTaksSizePart1 + gpuTaksSizePart2 + gpuTaksSizePart3; LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Size of a GPUTask: " << gpuTaksSize << "bytes (" << gpuTaksSizePart1 << " + " << gpuTaksSizePart2 << " + " << gpuTaksSizePart3 << ")"); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Tasks buffer size: " << (gpuTaksSize * taskCount / 1024) << "Kbytes"); // Check if the task buffer is too big if (oclDevice.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() < gpuTaksSize * taskCount) { stringstream ss; ss << "The GPUTask buffer is too big for this device (i.e. CL_DEVICE_MAX_MEM_ALLOC_SIZE=" << oclDevice.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() << "): try to reduce opencl.task.count and/or path.maxdepth and/or to change Sampler"; throw std::runtime_error(ss.str()); } tasksBuff = new cl::Buffer(oclContext, CL_MEM_READ_WRITE, gpuTaksSize * taskCount); deviceDesc->AllocMemory(tasksBuff->getInfo<CL_MEM_SIZE>()); //-------------------------------------------------------------------------- // Allocate GPU task statistic buffers //-------------------------------------------------------------------------- LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Task Stats buffer size: " << (sizeof(PathOCL::GPUTaskStats) * taskCount / 1024) << "Kbytes"); taskStatsBuff = new cl::Buffer(oclContext, CL_MEM_READ_WRITE, sizeof(PathOCL::GPUTaskStats) * taskCount); deviceDesc->AllocMemory(taskStatsBuff->getInfo<CL_MEM_SIZE>()); //-------------------------------------------------------------------------- // Compile kernels //-------------------------------------------------------------------------- InitKernels(); //-------------------------------------------------------------------------- // Initialize //-------------------------------------------------------------------------- // Set kernel arguments SetKernelArgs(); cl::CommandQueue &oclQueue = intersectionDevice->GetOpenCLQueue(); // Clear the frame buffer oclQueue.enqueueNDRangeKernel(*initFBKernel, cl::NullRange, cl::NDRange(RoundUp<unsigned int>(frameBufferPixelCount, initFBWorkGroupSize)), cl::NDRange(initFBWorkGroupSize)); // Initialize the tasks buffer oclQueue.enqueueNDRangeKernel(*initKernel, cl::NullRange, cl::NDRange(taskCount), cl::NDRange(initWorkGroupSize)); oclQueue.finish(); // Reset statistics in order to be more accurate intersectionDevice->ResetPerformaceStats(); }
void PathOCLRenderThread::InitKernels() { //-------------------------------------------------------------------------- // Compile kernels //-------------------------------------------------------------------------- CompiledScene *cscene = renderEngine->compiledScene; cl::Context &oclContext = intersectionDevice->GetOpenCLContext(); cl::Device &oclDevice = intersectionDevice->GetOpenCLDevice(); // Set #define symbols stringstream ss; ss.precision(6); ss << scientific << " -D PARAM_TASK_COUNT=" << renderEngine->taskCount << " -D PARAM_IMAGE_WIDTH=" << renderEngine->film->GetWidth() << " -D PARAM_IMAGE_HEIGHT=" << renderEngine->film->GetHeight() << " -D PARAM_RAY_EPSILON=" << renderEngine->epsilon << "f" << " -D PARAM_SEED=" << seed << " -D PARAM_MAX_PATH_DEPTH=" << renderEngine->maxPathDepth << " -D PARAM_MAX_DIFFUSE_PATH_VERTEX_COUNT=" << renderEngine->maxDiffusePathVertexCount << " -D PARAM_RR_DEPTH=" << renderEngine->rrDepth << " -D PARAM_RR_CAP=" << renderEngine->rrImportanceCap << "f" ; switch (renderEngine->renderConfig->scene->dataSet->GetAcceleratorType()) { case ACCEL_BVH: ss << " -D PARAM_ACCEL_BVH"; break; case ACCEL_QBVH: ss << " -D PARAM_ACCEL_QBVH"; break; case ACCEL_MQBVH: ss << " -D PARAM_ACCEL_MQBVH"; break; default: assert (false); } if (cscene->enable_MAT_MATTE) ss << " -D PARAM_ENABLE_MAT_MATTE"; if (cscene->enable_MAT_AREALIGHT) ss << " -D PARAM_ENABLE_MAT_AREALIGHT"; if (cscene->enable_MAT_MIRROR) ss << " -D PARAM_ENABLE_MAT_MIRROR"; if (cscene->enable_MAT_GLASS) ss << " -D PARAM_ENABLE_MAT_GLASS"; if (cscene->enable_MAT_MATTEMIRROR) ss << " -D PARAM_ENABLE_MAT_MATTEMIRROR"; if (cscene->enable_MAT_METAL) ss << " -D PARAM_ENABLE_MAT_METAL"; if (cscene->enable_MAT_MATTEMETAL) ss << " -D PARAM_ENABLE_MAT_MATTEMETAL"; if (cscene->enable_MAT_ALLOY) ss << " -D PARAM_ENABLE_MAT_ALLOY"; if (cscene->enable_MAT_ARCHGLASS) ss << " -D PARAM_ENABLE_MAT_ARCHGLASS"; if (cscene->camera.lensRadius > 0.f) ss << " -D PARAM_CAMERA_HAS_DOF"; if (infiniteLightBuff) ss << " -D PARAM_HAS_INFINITELIGHT"; if (skyLightBuff) ss << " -D PARAM_HAS_SKYLIGHT"; if (sunLightBuff) { ss << " -D PARAM_HAS_SUNLIGHT"; if (!areaLightsBuff) { ss << " -D PARAM_DIRECT_LIGHT_SAMPLING" << " -D PARAM_DL_LIGHT_COUNT=0" ; } } if (areaLightsBuff) { ss << " -D PARAM_DIRECT_LIGHT_SAMPLING" << " -D PARAM_DL_LIGHT_COUNT=" << renderEngine->compiledScene->areaLights.size() ; } if (texMapRGBBuff || texMapAlphaBuff) ss << " -D PARAM_HAS_TEXTUREMAPS"; if (texMapAlphaBuff) ss << " -D PARAM_HAS_ALPHA_TEXTUREMAPS"; if (meshBumpsBuff) ss << " -D PARAM_HAS_BUMPMAPS"; if (meshNormalMapsBuff) ss << " -D PARAM_HAS_NORMALMAPS"; const PathOCL::Filter *filter = renderEngine->filter; switch (filter->type) { case PathOCL::NONE: ss << " -D PARAM_IMAGE_FILTER_TYPE=0"; break; case PathOCL::BOX: ss << " -D PARAM_IMAGE_FILTER_TYPE=1" << " -D PARAM_IMAGE_FILTER_WIDTH_X=" << filter->widthX << "f" << " -D PARAM_IMAGE_FILTER_WIDTH_Y=" << filter->widthY << "f"; break; case PathOCL::GAUSSIAN: ss << " -D PARAM_IMAGE_FILTER_TYPE=2" << " -D PARAM_IMAGE_FILTER_WIDTH_X=" << filter->widthX << "f" << " -D PARAM_IMAGE_FILTER_WIDTH_Y=" << filter->widthY << "f" << " -D PARAM_IMAGE_FILTER_GAUSSIAN_ALPHA=" << ((PathOCL::GaussianFilter *)filter)->alpha << "f"; break; case PathOCL::MITCHELL: ss << " -D PARAM_IMAGE_FILTER_TYPE=3" << " -D PARAM_IMAGE_FILTER_WIDTH_X=" << filter->widthX << "f" << " -D PARAM_IMAGE_FILTER_WIDTH_Y=" << filter->widthY << "f" << " -D PARAM_IMAGE_FILTER_MITCHELL_B=" << ((PathOCL::MitchellFilter *)filter)->B << "f" << " -D PARAM_IMAGE_FILTER_MITCHELL_C=" << ((PathOCL::MitchellFilter *)filter)->C << "f"; break; default: assert (false); } if (renderEngine->usePixelAtomics) ss << " -D PARAM_USE_PIXEL_ATOMICS"; const PathOCL::Sampler *sampler = renderEngine->sampler; switch (sampler->type) { case PathOCL::INLINED_RANDOM: ss << " -D PARAM_SAMPLER_TYPE=0"; break; case PathOCL::RANDOM: ss << " -D PARAM_SAMPLER_TYPE=1"; break; case PathOCL::METROPOLIS: ss << " -D PARAM_SAMPLER_TYPE=2" << " -D PARAM_SAMPLER_METROPOLIS_LARGE_STEP_RATE=" << ((PathOCL::MetropolisSampler *)sampler)->largeStepRate << "f" << " -D PARAM_SAMPLER_METROPOLIS_MAX_CONSECUTIVE_REJECT=" << ((PathOCL::MetropolisSampler *)sampler)->maxConsecutiveReject << " -D PARAM_SAMPLER_METROPOLIS_IMAGE_MUTATION_RANGE=" << ((PathOCL::MetropolisSampler *)sampler)->imageMutationRate << "f"; break; case PathOCL::STRATIFIED: ss << " -D PARAM_SAMPLER_TYPE=3" << " -D PARAM_SAMPLER_STRATIFIED_X_SAMPLES=" << ((PathOCL::StratifiedSampler *)sampler)->xSamples << " -D PARAM_SAMPLER_STRATIFIED_Y_SAMPLES=" << ((PathOCL::StratifiedSampler *)sampler)->ySamples; break; default: assert (false); } // Check the OpenCL vendor and use some specific compiler options #if defined(__APPLE__) // OSX version detection { struct utsname retval; uname(&retval); if(retval.release[0] == '1' && retval.release[1] < '1') // result < darwin 11 ss << " -D __APPLE_FIX__"; } #endif //-------------------------------------------------------------------------- const double tStart = WallClockTime(); // Check if I have to recompile the kernels string newKernelParameters = ss.str(); if (kernelsParameters != newKernelParameters) { kernelsParameters = newKernelParameters; // Compile sources stringstream ssKernel; ssKernel << _LUXRAYS_UV_OCLDEFINE _LUXRAYS_SPECTRUM_OCLDEFINE _LUXRAYS_POINT_OCLDEFINE _LUXRAYS_VECTOR_OCLDEFINE _LUXRAYS_TRIANGLE_OCLDEFINE _LUXRAYS_RAY_OCLDEFINE _LUXRAYS_RAYHIT_OCLDEFINE << KernelSource_PathOCL_kernel_datatypes << KernelSource_PathOCL_kernel_core << KernelSource_PathOCL_kernel_filters << KernelSource_PathOCL_kernel_scene << KernelSource_PathOCL_kernel_samplers << KernelSource_PathOCL_kernels; string kernelSource = ssKernel.str(); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Defined symbols: " << kernelsParameters); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling kernels "); bool cached; cl::STRING_CLASS error; cl::Program *program = kernelCache->Compile(oclContext, oclDevice, kernelsParameters, kernelSource, &cached, &error); if (!program) { LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] PathOCL kernel compilation error" << std::endl << error); throw std::runtime_error("PathOCL kernel compilation error"); } if (cached) { LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Kernels cached"); } else { LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Kernels not cached"); } //---------------------------------------------------------------------- // Init kernel //---------------------------------------------------------------------- delete initKernel; LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling Init Kernel"); initKernel = new cl::Kernel(*program, "Init"); initKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &initWorkGroupSize); if (intersectionDevice->GetForceWorkGroupSize() > 0) initWorkGroupSize = intersectionDevice->GetForceWorkGroupSize(); else if (renderEngine->sampler->type == PathOCL::STRATIFIED) { // Resize the workgroup to have enough local memory size_t localMem = oclDevice.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>(); while ((initWorkGroupSize > 64) && (stratifiedDataSize * initWorkGroupSize > localMem)) initWorkGroupSize /= 2; if (stratifiedDataSize * initWorkGroupSize > localMem) throw std::runtime_error("Not enough local memory to run, try to reduce path.sampler.xsamples and path.sampler.xsamples values"); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Cap work group size to: " << initWorkGroupSize); } //-------------------------------------------------------------------------- // InitFB kernel //-------------------------------------------------------------------------- delete initFBKernel; initFBKernel = new cl::Kernel(*program, "InitFrameBuffer"); initFBKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &initFBWorkGroupSize); if (intersectionDevice->GetForceWorkGroupSize() > 0) initFBWorkGroupSize = intersectionDevice->GetForceWorkGroupSize(); //---------------------------------------------------------------------- // Sampler kernel //---------------------------------------------------------------------- delete samplerKernel; LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling Sampler Kernel"); samplerKernel = new cl::Kernel(*program, "Sampler"); samplerKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &samplerWorkGroupSize); if (intersectionDevice->GetForceWorkGroupSize() > 0) samplerWorkGroupSize = intersectionDevice->GetForceWorkGroupSize(); else if (renderEngine->sampler->type == PathOCL::STRATIFIED) { // Resize the workgroup to have enough local memory size_t localMem = oclDevice.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>(); while ((samplerWorkGroupSize > 64) && (stratifiedDataSize * samplerWorkGroupSize > localMem)) samplerWorkGroupSize /= 2; if (stratifiedDataSize * samplerWorkGroupSize > localMem) throw std::runtime_error("Not enough local memory to run, try to reduce path.sampler.xsamples and path.sampler.xsamples values"); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Cap work group size to: " << samplerWorkGroupSize); } //---------------------------------------------------------------------- // AdvancePaths kernel //---------------------------------------------------------------------- delete advancePathsKernel; LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling AdvancePaths Kernel"); advancePathsKernel = new cl::Kernel(*program, "AdvancePaths"); advancePathsKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &advancePathsWorkGroupSize); if (intersectionDevice->GetForceWorkGroupSize() > 0) advancePathsWorkGroupSize = intersectionDevice->GetForceWorkGroupSize(); //---------------------------------------------------------------------- const double tEnd = WallClockTime(); LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Kernels compilation time: " << int((tEnd - tStart) * 1000.0) << "ms"); delete program; } else LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Using cached kernels"); }
void PathOCLRenderThread::RenderThreadImpl(PathOCLRenderThread *renderThread) { //LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread started"); cl::CommandQueue &oclQueue = renderThread->intersectionDevice->GetOpenCLQueue(); const unsigned int taskCount = renderThread->renderEngine->taskCount; oclQueue.finish(); // Wait for the signal to start the rendering renderThread->renderEngine->renderStartBarrier->wait(); try { double startTime = WallClockTime(); while (!boost::this_thread::interruption_requested()) { /*if(renderThread->threadIndex == 0) cerr<< "[DEBUG] =================================");*/ // Async. transfer of the frame buffer oclQueue.enqueueReadBuffer( *(renderThread->frameBufferBuff), CL_FALSE, 0, renderThread->frameBufferBuff->getInfo<CL_MEM_SIZE>(), renderThread->frameBuffer); // Async. transfer of GPU task statistics oclQueue.enqueueReadBuffer( *(renderThread->taskStatsBuff), CL_FALSE, 0, sizeof(PathOCL::GPUTaskStats) * taskCount, renderThread->gpuTaskStats); for (;;) { cl::Event event; // Decide how many kernels to enqueue const unsigned int screenRefreshInterval = renderThread->renderEngine->renderConfig->GetScreenRefreshInterval(); unsigned int iterations; if (screenRefreshInterval <= 100) iterations = 1; else if (screenRefreshInterval <= 500) iterations = 2; else if (screenRefreshInterval <= 1000) iterations = 4; else iterations = 8; for (unsigned int i = 0; i < iterations; ++i) { // Generate the samples and paths if (i == 0) oclQueue.enqueueNDRangeKernel(*(renderThread->samplerKernel), cl::NullRange, cl::NDRange(taskCount), cl::NDRange(renderThread->samplerWorkGroupSize), NULL, &event); else oclQueue.enqueueNDRangeKernel(*(renderThread->samplerKernel), cl::NullRange, cl::NDRange(taskCount), cl::NDRange(renderThread->samplerWorkGroupSize)); // Trace rays renderThread->intersectionDevice->EnqueueTraceRayBuffer(*(renderThread->raysBuff), *(renderThread->hitsBuff), taskCount, NULL, NULL); // Advance to next path state oclQueue.enqueueNDRangeKernel(*(renderThread->advancePathsKernel), cl::NullRange, cl::NDRange(taskCount), cl::NDRange(renderThread->advancePathsWorkGroupSize)); } oclQueue.flush(); event.wait(); const double elapsedTime = WallClockTime() - startTime; /*if(renderThread->threadIndex == 0) cerr<< "[DEBUG] Elapsed time: " << elapsedTime * 1000.0 << "ms (screenRefreshInterval: " << renderThread->renderEngine->screenRefreshInterval << ")");*/ if ((elapsedTime * 1000.0 > (double)screenRefreshInterval) || boost::this_thread::interruption_requested()) break; } startTime = WallClockTime(); } //LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread halted"); } catch (boost::thread_interrupted) { LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread halted"); } catch (cl::Error err) { LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread ERROR: " << err.what() << "(" << luxrays::utils::oclErrorString(err.err()) << ")"); } oclQueue.enqueueReadBuffer( *(renderThread->frameBufferBuff), CL_TRUE, 0, renderThread->frameBufferBuff->getInfo<CL_MEM_SIZE>(), renderThread->frameBuffer); }