コード例 #1
0
void PathOCLRenderThread::AllocOCLBufferRW(cl::Buffer **buff, const size_t size, const string &desc) {
	if (*buff) {
		// Check the size of the already allocated buffer

		if (size == (*buff)->getInfo<CL_MEM_SIZE>()) {
			// I can reuse the buffer
			//LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer reused for size: " << (size / 1024) << "Kbytes");
			return;
		}
	}

	cl::Context &oclContext = intersectionDevice->GetOpenCLContext();

	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer size: " << (size / 1024) << "Kbytes");
	*buff = new cl::Buffer(oclContext,
			CL_MEM_READ_WRITE,
			size);
	intersectionDevice->AllocMemory((*buff)->getInfo<CL_MEM_SIZE>());
}
コード例 #2
0
void PathOCLRenderThread::AllocOCLBufferRO(cl::Buffer **buff, void *src, const size_t size, const string &desc) {
	if (*buff) {
		// Check the size of the already allocated buffer

		if (size == (*buff)->getInfo<CL_MEM_SIZE>()) {
			// I can reuse the buffer; just update the content

			//LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer updated for size: " << (size / 1024) << "Kbytes");
			cl::CommandQueue &oclQueue = intersectionDevice->GetOpenCLQueue();
			oclQueue.enqueueWriteBuffer(**buff, CL_FALSE, 0, size, src);
			return;
		}
	}

	cl::Context &oclContext = intersectionDevice->GetOpenCLContext();

	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] " << desc << " buffer size: " << (size / 1024) << "Kbytes");
	*buff = new cl::Buffer(oclContext,
			CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
			size, src);
	intersectionDevice->AllocMemory((*buff)->getInfo<CL_MEM_SIZE>());
}
コード例 #3
0
void PathOCLRenderThread::InitRender() {
	Scene *scene = renderEngine->renderConfig->scene;

	cl::Context &oclContext = intersectionDevice->GetOpenCLContext();
	cl::Device &oclDevice = intersectionDevice->GetOpenCLDevice();
	const OpenCLDeviceDescription *deviceDesc = intersectionDevice->GetDeviceDesc();

	double tStart, tEnd;

	//--------------------------------------------------------------------------
	// FrameBuffer definition
	//--------------------------------------------------------------------------

	InitFrameBuffer();

	//--------------------------------------------------------------------------
	// Camera definition
	//--------------------------------------------------------------------------

	InitCamera();

	//--------------------------------------------------------------------------
	// Scene geometry
	//--------------------------------------------------------------------------

	InitGeometry();

	//--------------------------------------------------------------------------
	// Translate material definitions
	//--------------------------------------------------------------------------

	InitMaterials();

	//--------------------------------------------------------------------------
	// Translate area lights
	//--------------------------------------------------------------------------

	InitAreaLights();

	//--------------------------------------------------------------------------
	// Check if there is an infinite light source
	//--------------------------------------------------------------------------

	InitInfiniteLight();

	//--------------------------------------------------------------------------
	// Check if there is an sun light source
	//--------------------------------------------------------------------------

	InitSunLight();

	//--------------------------------------------------------------------------
	// Check if there is an sky light source
	//--------------------------------------------------------------------------

	InitSkyLight();

	const unsigned int areaLightCount = renderEngine->compiledScene->areaLights.size();
	if (!skyLightBuff && !sunLightBuff && !infiniteLightBuff && (areaLightCount == 0))
		throw runtime_error("There are no light sources supported by PathOCL in the scene");

	//--------------------------------------------------------------------------
	// Translate mesh texture maps
	//--------------------------------------------------------------------------

	InitTextureMaps();

	//--------------------------------------------------------------------------
	// Allocate Ray/RayHit buffers
	//--------------------------------------------------------------------------

	const unsigned int taskCount = renderEngine->taskCount;

	tStart = WallClockTime();

	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Ray buffer size: " << (sizeof(Ray) * taskCount / 1024) << "Kbytes");
	raysBuff = new cl::Buffer(oclContext,
			CL_MEM_READ_WRITE,
			sizeof(Ray) * taskCount);
	deviceDesc->AllocMemory(raysBuff->getInfo<CL_MEM_SIZE>());

	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] RayHit buffer size: " << (sizeof(RayHit) * taskCount / 1024) << "Kbytes");
	hitsBuff = new cl::Buffer(oclContext,
			CL_MEM_READ_WRITE,
			sizeof(RayHit) * taskCount);
	deviceDesc->AllocMemory(hitsBuff->getInfo<CL_MEM_SIZE>());

	tEnd = WallClockTime();
	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] OpenCL buffer creation time: " << int((tEnd - tStart) * 1000.0) << "ms");

	//--------------------------------------------------------------------------
	// Allocate GPU task buffers
	//--------------------------------------------------------------------------

	// TODO: clenup all this mess

	const size_t gpuTaksSizePart1 =
		// Seed size
		sizeof(PathOCL::Seed);

	const size_t uDataEyePathVertexSize =
		// IDX_SCREEN_X, IDX_SCREEN_Y
		sizeof(float) * 2 +
		// IDX_DOF_X, IDX_DOF_Y
		((scene->camera->lensRadius > 0.f) ? (sizeof(float) * 2) : 0);
	const size_t uDataPerPathVertexSize =
		// IDX_TEX_ALPHA,
		((texMapAlphaBuff) ? sizeof(float) : 0) +
		// IDX_BSDF_X, IDX_BSDF_Y, IDX_BSDF_Z
		sizeof(float) * 3 +
		// IDX_DIRECTLIGHT_X, IDX_DIRECTLIGHT_Y, IDX_DIRECTLIGHT_Z
		(((areaLightCount > 0) || sunLightBuff) ? (sizeof(float) * 3) : 0) +
		// IDX_RR
		sizeof(float);
	const size_t uDataSize = (renderEngine->sampler->type == PathOCL::INLINED_RANDOM) ?
		// Only IDX_SCREEN_X, IDX_SCREEN_Y
		(sizeof(float) * 2) :
		((renderEngine->sampler->type == PathOCL::METROPOLIS) ?
			(sizeof(float) * 2 + sizeof(unsigned int) * 5 + sizeof(Spectrum) + 2 * (uDataEyePathVertexSize + uDataPerPathVertexSize * renderEngine->maxPathDepth)) :
			(uDataEyePathVertexSize + uDataPerPathVertexSize * renderEngine->maxPathDepth));

	size_t sampleSize =
		// uint pixelIndex;
		((renderEngine->sampler->type == PathOCL::METROPOLIS) ? 0 : sizeof(unsigned int)) +
		uDataSize +
		// Spectrum radiance;
		sizeof(Spectrum);

	stratifiedDataSize = 0;
	if (renderEngine->sampler->type == PathOCL::STRATIFIED) {
		PathOCL::StratifiedSampler *s = (PathOCL::StratifiedSampler *)renderEngine->sampler;
		stratifiedDataSize =
				// stratifiedScreen2D
				sizeof(float) * s->xSamples * s->ySamples * 2 +
				// stratifiedDof2D
				((scene->camera->lensRadius > 0.f) ? (sizeof(float) * s->xSamples * s->ySamples * 2) : 0) +
				// stratifiedAlpha1D
				((texMapAlphaBuff) ? (sizeof(float) * s->xSamples) : 0) +
				// stratifiedBSDF2D
				sizeof(float) * s->xSamples * s->ySamples * 2 +
				// stratifiedBSDF1D
				sizeof(float) * s->xSamples +
				// stratifiedLight2D
				// stratifiedLight1D
				(((areaLightCount > 0) || sunLightBuff) ? (sizeof(float) * s->xSamples * s->ySamples * 2 + sizeof(float) * s->xSamples) : 0);

		sampleSize += stratifiedDataSize;
	}

	const size_t gpuTaksSizePart2 = sampleSize;

	const size_t gpuTaksSizePart3 =
		// PathState size
		((((areaLightCount > 0) || sunLightBuff) ? sizeof(PathOCL::PathStateDL) : sizeof(PathOCL::PathState)) +
			//unsigned int diffuseVertexCount;
			((renderEngine->maxDiffusePathVertexCount < renderEngine->maxPathDepth) ? sizeof(unsigned int) : 0));

	const size_t gpuTaksSize = gpuTaksSizePart1 + gpuTaksSizePart2 + gpuTaksSizePart3;
	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Size of a GPUTask: " << gpuTaksSize <<
			"bytes (" << gpuTaksSizePart1 << " + " << gpuTaksSizePart2 << " + " << gpuTaksSizePart3 << ")");
	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Tasks buffer size: " << (gpuTaksSize * taskCount / 1024) << "Kbytes");

	// Check if the task buffer is too big
	if (oclDevice.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() < gpuTaksSize * taskCount) {
		stringstream ss;
		ss << "The GPUTask buffer is too big for this device (i.e. CL_DEVICE_MAX_MEM_ALLOC_SIZE=" <<
				oclDevice.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() <<
				"): try to reduce opencl.task.count and/or path.maxdepth and/or to change Sampler";
		throw std::runtime_error(ss.str());
	}

	tasksBuff = new cl::Buffer(oclContext,
			CL_MEM_READ_WRITE,
			gpuTaksSize * taskCount);
	deviceDesc->AllocMemory(tasksBuff->getInfo<CL_MEM_SIZE>());

	//--------------------------------------------------------------------------
	// Allocate GPU task statistic buffers
	//--------------------------------------------------------------------------

	LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Task Stats buffer size: " << (sizeof(PathOCL::GPUTaskStats) * taskCount / 1024) << "Kbytes");
	taskStatsBuff = new cl::Buffer(oclContext,
			CL_MEM_READ_WRITE,
			sizeof(PathOCL::GPUTaskStats) * taskCount);
	deviceDesc->AllocMemory(taskStatsBuff->getInfo<CL_MEM_SIZE>());

	//--------------------------------------------------------------------------
	// Compile kernels
	//--------------------------------------------------------------------------

	InitKernels();

	//--------------------------------------------------------------------------
	// Initialize
	//--------------------------------------------------------------------------

	// Set kernel arguments
	SetKernelArgs();

	cl::CommandQueue &oclQueue = intersectionDevice->GetOpenCLQueue();

	// Clear the frame buffer
	oclQueue.enqueueNDRangeKernel(*initFBKernel, cl::NullRange,
			cl::NDRange(RoundUp<unsigned int>(frameBufferPixelCount, initFBWorkGroupSize)),
			cl::NDRange(initFBWorkGroupSize));

	// Initialize the tasks buffer
	oclQueue.enqueueNDRangeKernel(*initKernel, cl::NullRange,
			cl::NDRange(taskCount), cl::NDRange(initWorkGroupSize));
	oclQueue.finish();

	// Reset statistics in order to be more accurate
	intersectionDevice->ResetPerformaceStats();
}
コード例 #4
0
void PathOCLRenderThread::InitKernels() {
	//--------------------------------------------------------------------------
	// Compile kernels
	//--------------------------------------------------------------------------

	CompiledScene *cscene = renderEngine->compiledScene;
	cl::Context &oclContext = intersectionDevice->GetOpenCLContext();
	cl::Device &oclDevice = intersectionDevice->GetOpenCLDevice();

	// Set #define symbols
	stringstream ss;
	ss.precision(6);
	ss << scientific <<
			" -D PARAM_TASK_COUNT=" << renderEngine->taskCount <<
			" -D PARAM_IMAGE_WIDTH=" << renderEngine->film->GetWidth() <<
			" -D PARAM_IMAGE_HEIGHT=" << renderEngine->film->GetHeight() <<
			" -D PARAM_RAY_EPSILON=" << renderEngine->epsilon << "f" <<
			" -D PARAM_SEED=" << seed <<
			" -D PARAM_MAX_PATH_DEPTH=" << renderEngine->maxPathDepth <<
			" -D PARAM_MAX_DIFFUSE_PATH_VERTEX_COUNT=" << renderEngine->maxDiffusePathVertexCount <<
			" -D PARAM_RR_DEPTH=" << renderEngine->rrDepth <<
			" -D PARAM_RR_CAP=" << renderEngine->rrImportanceCap << "f"
			;

	switch (renderEngine->renderConfig->scene->dataSet->GetAcceleratorType()) {
		case ACCEL_BVH:
			ss << " -D PARAM_ACCEL_BVH";
			break;
		case ACCEL_QBVH:
			ss << " -D PARAM_ACCEL_QBVH";
			break;
		case ACCEL_MQBVH:
			ss << " -D PARAM_ACCEL_MQBVH";
			break;
		default:
			assert (false);
	}

	if (cscene->enable_MAT_MATTE)
		ss << " -D PARAM_ENABLE_MAT_MATTE";
	if (cscene->enable_MAT_AREALIGHT)
		ss << " -D PARAM_ENABLE_MAT_AREALIGHT";
	if (cscene->enable_MAT_MIRROR)
		ss << " -D PARAM_ENABLE_MAT_MIRROR";
	if (cscene->enable_MAT_GLASS)
		ss << " -D PARAM_ENABLE_MAT_GLASS";
	if (cscene->enable_MAT_MATTEMIRROR)
		ss << " -D PARAM_ENABLE_MAT_MATTEMIRROR";
	if (cscene->enable_MAT_METAL)
		ss << " -D PARAM_ENABLE_MAT_METAL";
	if (cscene->enable_MAT_MATTEMETAL)
		ss << " -D PARAM_ENABLE_MAT_MATTEMETAL";
	if (cscene->enable_MAT_ALLOY)
		ss << " -D PARAM_ENABLE_MAT_ALLOY";
	if (cscene->enable_MAT_ARCHGLASS)
		ss << " -D PARAM_ENABLE_MAT_ARCHGLASS";

	if (cscene->camera.lensRadius > 0.f)
		ss << " -D PARAM_CAMERA_HAS_DOF";


	if (infiniteLightBuff)
		ss << " -D PARAM_HAS_INFINITELIGHT";

	if (skyLightBuff)
		ss << " -D PARAM_HAS_SKYLIGHT";

	if (sunLightBuff) {
		ss << " -D PARAM_HAS_SUNLIGHT";

		if (!areaLightsBuff) {
			ss <<
				" -D PARAM_DIRECT_LIGHT_SAMPLING" <<
				" -D PARAM_DL_LIGHT_COUNT=0"
				;
		}
	}

	if (areaLightsBuff) {
		ss <<
				" -D PARAM_DIRECT_LIGHT_SAMPLING" <<
				" -D PARAM_DL_LIGHT_COUNT=" << renderEngine->compiledScene->areaLights.size()
				;
	}

	if (texMapRGBBuff || texMapAlphaBuff)
		ss << " -D PARAM_HAS_TEXTUREMAPS";
	if (texMapAlphaBuff)
		ss << " -D PARAM_HAS_ALPHA_TEXTUREMAPS";
	if (meshBumpsBuff)
		ss << " -D PARAM_HAS_BUMPMAPS";
	if (meshNormalMapsBuff)
		ss << " -D PARAM_HAS_NORMALMAPS";

	const PathOCL::Filter *filter = renderEngine->filter;
	switch (filter->type) {
		case PathOCL::NONE:
			ss << " -D PARAM_IMAGE_FILTER_TYPE=0";
			break;
		case PathOCL::BOX:
			ss << " -D PARAM_IMAGE_FILTER_TYPE=1" <<
					" -D PARAM_IMAGE_FILTER_WIDTH_X=" << filter->widthX << "f" <<
					" -D PARAM_IMAGE_FILTER_WIDTH_Y=" << filter->widthY << "f";
			break;
		case PathOCL::GAUSSIAN:
			ss << " -D PARAM_IMAGE_FILTER_TYPE=2" <<
					" -D PARAM_IMAGE_FILTER_WIDTH_X=" << filter->widthX << "f" <<
					" -D PARAM_IMAGE_FILTER_WIDTH_Y=" << filter->widthY << "f" <<
					" -D PARAM_IMAGE_FILTER_GAUSSIAN_ALPHA=" << ((PathOCL::GaussianFilter *)filter)->alpha << "f";
			break;
		case PathOCL::MITCHELL:
			ss << " -D PARAM_IMAGE_FILTER_TYPE=3" <<
					" -D PARAM_IMAGE_FILTER_WIDTH_X=" << filter->widthX << "f" <<
					" -D PARAM_IMAGE_FILTER_WIDTH_Y=" << filter->widthY << "f" <<
					" -D PARAM_IMAGE_FILTER_MITCHELL_B=" << ((PathOCL::MitchellFilter *)filter)->B << "f" <<
					" -D PARAM_IMAGE_FILTER_MITCHELL_C=" << ((PathOCL::MitchellFilter *)filter)->C << "f";
			break;
		default:
			assert (false);
	}

	if (renderEngine->usePixelAtomics)
		ss << " -D PARAM_USE_PIXEL_ATOMICS";

	const PathOCL::Sampler *sampler = renderEngine->sampler;
	switch (sampler->type) {
		case PathOCL::INLINED_RANDOM:
			ss << " -D PARAM_SAMPLER_TYPE=0";
			break;
		case PathOCL::RANDOM:
			ss << " -D PARAM_SAMPLER_TYPE=1";
			break;
		case PathOCL::METROPOLIS:
			ss << " -D PARAM_SAMPLER_TYPE=2" <<
					" -D PARAM_SAMPLER_METROPOLIS_LARGE_STEP_RATE=" << ((PathOCL::MetropolisSampler *)sampler)->largeStepRate << "f" <<
					" -D PARAM_SAMPLER_METROPOLIS_MAX_CONSECUTIVE_REJECT=" << ((PathOCL::MetropolisSampler *)sampler)->maxConsecutiveReject <<
					" -D PARAM_SAMPLER_METROPOLIS_IMAGE_MUTATION_RANGE=" << ((PathOCL::MetropolisSampler *)sampler)->imageMutationRate << "f";
			break;
		case PathOCL::STRATIFIED:
			ss << " -D PARAM_SAMPLER_TYPE=3" <<
					" -D PARAM_SAMPLER_STRATIFIED_X_SAMPLES=" << ((PathOCL::StratifiedSampler *)sampler)->xSamples <<
					" -D PARAM_SAMPLER_STRATIFIED_Y_SAMPLES=" << ((PathOCL::StratifiedSampler *)sampler)->ySamples;
			break;
		default:
			assert (false);
	}

	// Check the OpenCL vendor and use some specific compiler options
	
#if defined(__APPLE__) // OSX version detection
	{
	struct utsname retval;
		uname(&retval);
	if(retval.release[0] == '1' && retval.release[1] < '1') // result < darwin 11
		ss << " -D __APPLE_FIX__";
	}
#endif

	//--------------------------------------------------------------------------

	const double tStart = WallClockTime();

	// Check if I have to recompile the kernels
	string newKernelParameters = ss.str();
	if (kernelsParameters != newKernelParameters) {
		kernelsParameters = newKernelParameters;

		// Compile sources
		stringstream ssKernel;
		ssKernel <<
			_LUXRAYS_UV_OCLDEFINE
			_LUXRAYS_SPECTRUM_OCLDEFINE
			_LUXRAYS_POINT_OCLDEFINE
			_LUXRAYS_VECTOR_OCLDEFINE
			_LUXRAYS_TRIANGLE_OCLDEFINE
			_LUXRAYS_RAY_OCLDEFINE
			_LUXRAYS_RAYHIT_OCLDEFINE <<
			KernelSource_PathOCL_kernel_datatypes <<
			KernelSource_PathOCL_kernel_core <<
			KernelSource_PathOCL_kernel_filters <<
			KernelSource_PathOCL_kernel_scene <<
			KernelSource_PathOCL_kernel_samplers <<
			KernelSource_PathOCL_kernels;
		string kernelSource = ssKernel.str();

		LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Defined symbols: " << kernelsParameters);
		LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling kernels ");

		bool cached;
		cl::STRING_CLASS error;
		cl::Program *program = kernelCache->Compile(oclContext, oclDevice,
				kernelsParameters, kernelSource,
				&cached, &error);

		if (!program) {
			LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] PathOCL kernel compilation error" << std::endl << error);

			throw std::runtime_error("PathOCL kernel compilation error");
		}

		if (cached) {
			LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Kernels cached");
		} else {
			LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Kernels not cached");
		}

		//----------------------------------------------------------------------
		// Init kernel
		//----------------------------------------------------------------------

		delete initKernel;
		LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling Init Kernel");
		initKernel = new cl::Kernel(*program, "Init");
		initKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &initWorkGroupSize);

		if (intersectionDevice->GetForceWorkGroupSize() > 0)
			initWorkGroupSize = intersectionDevice->GetForceWorkGroupSize();
		else if (renderEngine->sampler->type == PathOCL::STRATIFIED) {
			// Resize the workgroup to have enough local memory
			size_t localMem = oclDevice.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();

			while ((initWorkGroupSize > 64) && (stratifiedDataSize * initWorkGroupSize > localMem))
				initWorkGroupSize /= 2;

			if (stratifiedDataSize * initWorkGroupSize > localMem)
				throw std::runtime_error("Not enough local memory to run, try to reduce path.sampler.xsamples and path.sampler.xsamples values");

			LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Cap work group size to: " << initWorkGroupSize);
		}

		//--------------------------------------------------------------------------
		// InitFB kernel
		//--------------------------------------------------------------------------

		delete initFBKernel;
		initFBKernel = new cl::Kernel(*program, "InitFrameBuffer");
		initFBKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &initFBWorkGroupSize);
		if (intersectionDevice->GetForceWorkGroupSize() > 0)
			initFBWorkGroupSize = intersectionDevice->GetForceWorkGroupSize();

		//----------------------------------------------------------------------
		// Sampler kernel
		//----------------------------------------------------------------------

		delete samplerKernel;
		LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling Sampler Kernel");
		samplerKernel = new cl::Kernel(*program, "Sampler");
		samplerKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &samplerWorkGroupSize);

		if (intersectionDevice->GetForceWorkGroupSize() > 0)
			samplerWorkGroupSize = intersectionDevice->GetForceWorkGroupSize();
		else if (renderEngine->sampler->type == PathOCL::STRATIFIED) {
			// Resize the workgroup to have enough local memory
			size_t localMem = oclDevice.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();

			while ((samplerWorkGroupSize > 64) && (stratifiedDataSize * samplerWorkGroupSize > localMem))
				samplerWorkGroupSize /= 2;

			if (stratifiedDataSize * samplerWorkGroupSize > localMem)
				throw std::runtime_error("Not enough local memory to run, try to reduce path.sampler.xsamples and path.sampler.xsamples values");

			LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Cap work group size to: " << samplerWorkGroupSize);
		}

		//----------------------------------------------------------------------
		// AdvancePaths kernel
		//----------------------------------------------------------------------

		delete advancePathsKernel;
		LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Compiling AdvancePaths Kernel");
		advancePathsKernel = new cl::Kernel(*program, "AdvancePaths");
		advancePathsKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &advancePathsWorkGroupSize);
		if (intersectionDevice->GetForceWorkGroupSize() > 0)
			advancePathsWorkGroupSize = intersectionDevice->GetForceWorkGroupSize();

		//----------------------------------------------------------------------

		const double tEnd = WallClockTime();
		LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Kernels compilation time: " << int((tEnd - tStart) * 1000.0) << "ms");

		delete program;
	} else
		LM_LOG_ENGINE("[PathOCLRenderThread::" << threadIndex << "] Using cached kernels");
}
コード例 #5
0
void PathOCLRenderThread::RenderThreadImpl(PathOCLRenderThread *renderThread) {
	//LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread started");
	cl::CommandQueue &oclQueue = renderThread->intersectionDevice->GetOpenCLQueue();
	const unsigned int taskCount = renderThread->renderEngine->taskCount;

	oclQueue.finish();
	// Wait for the signal to start the rendering
	renderThread->renderEngine->renderStartBarrier->wait();

	try {
		double startTime = WallClockTime();
		while (!boost::this_thread::interruption_requested()) {
			/*if(renderThread->threadIndex == 0)
				cerr<< "[DEBUG] =================================");*/

			// Async. transfer of the frame buffer
			oclQueue.enqueueReadBuffer(
				*(renderThread->frameBufferBuff),
				CL_FALSE,
				0,
				renderThread->frameBufferBuff->getInfo<CL_MEM_SIZE>(),
				renderThread->frameBuffer);

			// Async. transfer of GPU task statistics
			oclQueue.enqueueReadBuffer(
				*(renderThread->taskStatsBuff),
				CL_FALSE,
				0,
				sizeof(PathOCL::GPUTaskStats) * taskCount,
				renderThread->gpuTaskStats);

			for (;;) {
				cl::Event event;

				// Decide how many kernels to enqueue
				const unsigned int screenRefreshInterval = renderThread->renderEngine->renderConfig->GetScreenRefreshInterval();

				unsigned int iterations;
				if (screenRefreshInterval <= 100)
					iterations = 1;
				else if (screenRefreshInterval <= 500)
					iterations = 2;
				else if (screenRefreshInterval <= 1000)
					iterations = 4;
				else
					iterations = 8;

				for (unsigned int i = 0; i < iterations; ++i) {
					// Generate the samples and paths
					if (i == 0)
						oclQueue.enqueueNDRangeKernel(*(renderThread->samplerKernel), cl::NullRange,
								cl::NDRange(taskCount), cl::NDRange(renderThread->samplerWorkGroupSize),
								NULL, &event);
					else
						oclQueue.enqueueNDRangeKernel(*(renderThread->samplerKernel), cl::NullRange,
								cl::NDRange(taskCount), cl::NDRange(renderThread->samplerWorkGroupSize));

					// Trace rays
					renderThread->intersectionDevice->EnqueueTraceRayBuffer(*(renderThread->raysBuff),
								*(renderThread->hitsBuff), taskCount, NULL, NULL);

					// Advance to next path state
					oclQueue.enqueueNDRangeKernel(*(renderThread->advancePathsKernel), cl::NullRange,
							cl::NDRange(taskCount), cl::NDRange(renderThread->advancePathsWorkGroupSize));
				}
				oclQueue.flush();

				event.wait();
				const double elapsedTime = WallClockTime() - startTime;

				/*if(renderThread->threadIndex == 0)
					cerr<< "[DEBUG] Elapsed time: " << elapsedTime * 1000.0 <<
							"ms (screenRefreshInterval: " << renderThread->renderEngine->screenRefreshInterval << ")");*/

				if ((elapsedTime * 1000.0 > (double)screenRefreshInterval) ||
						boost::this_thread::interruption_requested())
					break;
			}

			startTime = WallClockTime();
		}

		//LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread halted");
	} catch (boost::thread_interrupted) {
		LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread halted");
	} catch (cl::Error err) {
		LM_LOG_ENGINE("[PathOCLRenderThread::" << renderThread->threadIndex << "] Rendering thread ERROR: " << err.what() <<
				"(" << luxrays::utils::oclErrorString(err.err()) << ")");
	}

	oclQueue.enqueueReadBuffer(
			*(renderThread->frameBufferBuff),
			CL_TRUE,
			0,
			renderThread->frameBufferBuff->getInfo<CL_MEM_SIZE>(),
			renderThread->frameBuffer);
}