예제 #1
0
파일: ocl.cpp 프로젝트: scollinson/luxrays
cl::Program *oclKernelCache::ForcedCompile(cl::Context &context, cl::Device &device,
		const std::string &kernelsParameters, const std::string &kernelSource,
		cl::STRING_CLASS *error) {
	cl::Program *program = NULL;

	try {
		cl::Program::Sources source(1, std::make_pair(kernelSource.c_str(), kernelSource.length()));
		program = new cl::Program(context, source);

		VECTOR_CLASS<cl::Device> buildDevice;
		buildDevice.push_back(device);
		program->build(buildDevice, kernelsParameters.c_str());
	} catch (cl::Error err) {
		const std::string clerr = program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);

		std::stringstream ss;
		ss << "ERROR " << err.what() << "[" << luxrays::oclErrorString(err.err()) << "]:" <<
				std::endl << clerr << std::endl;
		*error = ss.str();

		if (program)
			delete program;
		program = NULL;
	}

	return program;
}
예제 #2
0
void OpenCLPrinter::printContextInfo(cl::Context context)
{
	print("--- ContextInfo ---", "");
	VECTOR_CLASS<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
	print("Number of devices", devices.size());
	for(int i=0; i<devices.size(); ++i)
		printDeviceInfo(devices[i]);
}
예제 #3
0
	OpenCLBVHKernel(OpenCLIntersectionDevice *dev) : OpenCLKernel(dev),
		vertsBuff(NULL), trisBuff(NULL), bvhBuff(NULL) {
		const Context *deviceContext = device->GetContext();
		cl::Context &oclContext = device->GetOpenCLContext();
		cl::Device &oclDevice = device->GetOpenCLDevice();
		const std::string &deviceName(device->GetName());
		// Compile sources
		std::string code(
			_LUXRAYS_POINT_OCLDEFINE
			_LUXRAYS_VECTOR_OCLDEFINE
			_LUXRAYS_RAY_OCLDEFINE
			_LUXRAYS_RAYHIT_OCLDEFINE
			_LUXRAYS_TRIANGLE_OCLDEFINE
			_LUXRAYS_BBOX_OCLDEFINE);
		code += KernelSource_BVH;
		cl::Program::Sources source(1, std::make_pair(code.c_str(), code.length()));
		cl::Program program = cl::Program(oclContext, source);
		try {
			VECTOR_CLASS<cl::Device> buildDevice;
			buildDevice.push_back(oclDevice);
			program.build(buildDevice);
		} catch (cl::Error err) {
			cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice);
			LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH compilation error:\n" << strError.c_str());

			throw err;
		}

		delete kernel;
		kernel = new cl::Kernel(program, "Intersect");
		kernel->getWorkGroupInfo<size_t>(oclDevice,
			CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize);
		LR_LOG(deviceContext, "[OpenCL device::" << deviceName <<
			"] BVH kernel work group size: " << workGroupSize);

		kernel->getWorkGroupInfo<size_t>(oclDevice,
			CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize);
		LR_LOG(deviceContext, "[OpenCL device::" << deviceName <<
			"] Suggested work group size: " << workGroupSize);

		if (device->GetForceWorkGroupSize() > 0) {
			workGroupSize = device->GetForceWorkGroupSize();
			LR_LOG(deviceContext, "[OpenCL device::" << deviceName <<
				"] Forced work group size: " << workGroupSize);
		}
	}
예제 #4
0
CLDeviceSelection::CLDeviceSelection(const QString selectString, bool allowGPU, bool allowCPU)
{
	VECTOR_CLASS<cl::Platform> platforms;
	CL_DETECT_ERROR(cl::Platform::get(&platforms));
	int selectIndex = 0;
	qDebug().nospace().noquote()<<"Making OpenCL device selection with selectstring='"<< selectString<< "', allowGPU="<<allowGPU<<", allowCPU="<<allowCPU<<"";
	for (size_t i = 0; i < platforms.size(); ++i) {
		qDebug().nospace().noquote()<<"Platform-" << i << ": " << QString::fromLocal8Bit(platforms[i].getInfo<CL_PLATFORM_VENDOR>().c_str());

		// Get the list of devices available on the platform
		VECTOR_CLASS<cl::Device> devices;
		platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices);

		for (size_t j = 0; j < devices.size(); ++j) {
			bool selected = false;
			auto type=devices[j].getInfo<CL_DEVICE_TYPE>();
			if ((allowGPU && (type == CL_DEVICE_TYPE_GPU)) || (allowCPU && (type == CL_DEVICE_TYPE_CPU))) {
				if (selectString.length() == 0) {
					selected = true;
				} else {
					if (selectString.length() <= selectIndex) {
						qWarning()<< "OpenCL select devices string (opencl.devices.select) has the wrong length";
						exit(1);
					}

					if (selectString.at(selectIndex) == '1') {
						selected = true;
					}
				}
			}
			if (selected) {
				push_back(devices[j]);
			}

			qDebug().nospace()<< " |-- Device-"<<i <<"."<<j <<": "<< devices[j]<<(selected?" [SEL]":" [---]");
			++selectIndex;

		}
	}

	if (size() == 0) {
		qWarning()<<"This program requires OpenCL enabled hardware. Unable to find any OpenCL GPU devices, so quitting";
		exit(1);
	}
}
예제 #5
0
	void SetUpOpenCL() {
		//----------------------------------------------------------------------
		// Compile kernel
		//----------------------------------------------------------------------

		const std::string &kernelFileName = commandLineOpts["kernel"].as<std::string>();
		OCLTOY_LOG("Compile OpenCL kernel: " << kernelFileName);

		// Read the kernel
		const std::string kernelSource = ReadSources(kernelFileName, "jugCLer");

		// Create the kernel program
		cl::Device &oclDevice = selectedDevices[0];
		cl::Context &oclContext = deviceContexts[0];
		cl::Program program = cl::Program(oclContext, kernelSource);
		try {
			VECTOR_CLASS<cl::Device> buildDevice;
			buildDevice.push_back(oclDevice);
			program.build(buildDevice);
		} catch (cl::Error err) {
			cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice);
			OCLTOY_LOG("Kernel compilation error:\n" << strError.c_str());

			throw err;
		}

		kernelsJugCLer = cl::Kernel(program, "render_gpu");
		kernelsJugCLer.getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &kernelsWorkGroupSize);
		if (commandLineOpts.count("workgroupsize"))
			kernelsWorkGroupSize = commandLineOpts["workgroupsize"].as<size_t>();
		OCLTOY_LOG("Using workgroup size: " << kernelsWorkGroupSize);

		//----------------------------------------------------------------------
		// Allocate buffer
		//----------------------------------------------------------------------

		AllocateBuffers();

		//----------------------------------------------------------------------
		// Set kernel arguments
		//----------------------------------------------------------------------

		kernelsJugCLer.setArg(0, *sceneBuff);
		kernelsJugCLer.setArg(1, *pixelsBuff);
	}
예제 #6
0
void OpenCLPixelDevice::CompileKernel(cl::Context &ctx, cl::Device &device, const std::string &src,
                                      const char *kernelName, cl::Kernel **kernel) {
    // Compile sources
    cl::Program::Sources source(1, std::make_pair(src.c_str(), src.length()));
    cl::Program program = cl::Program(ctx, source);
    try {
        VECTOR_CLASS<cl::Device> buildDevice;
        buildDevice.push_back(device);
        program.build(buildDevice, "-I.");
    } catch (cl::Error err) {
        cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
        LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] " << kernelName << " compilation error:\n" << strError.c_str());

        throw err;
    }

    *kernel = new cl::Kernel(program, kernelName);
}
예제 #7
0
Context::Context(LuxRaysDebugHandler handler, const int openclPlatformIndex,
		const bool verb) {
	debugHandler = handler;
	currentDataSet = NULL;
	started = false;
	verbose = verb;

	// Get the list of devices available on the platform
	NativeThreadDeviceDescription::AddDeviceDescs(deviceDescriptions);

#if !defined(LUXRAYS_DISABLE_OPENCL)
	// Platform info
	VECTOR_CLASS<cl::Platform> platforms;
	cl::Platform::get(&platforms);
	for (size_t i = 0; i < platforms.size(); ++i)
		LR_LOG(this, "OpenCL Platform " << i << ": " << platforms[i].getInfo<CL_PLATFORM_VENDOR>().c_str());

	if (openclPlatformIndex < 0) {
		if (platforms.size() > 0) {
			// Just use all the platforms available
			for (size_t i = 0; i < platforms.size(); ++i)
				OpenCLDeviceDescription::AddDeviceDescs(
					platforms[i], DEVICE_TYPE_OPENCL_ALL,
					deviceDescriptions);
		} else
			LR_LOG(this, "No OpenCL platform available");
	} else {
		if ((platforms.size() == 0) || (openclPlatformIndex >= (int)platforms.size()))
			throw std::runtime_error("Unable to find an appropriate OpenCL platform");
		else {
			OpenCLDeviceDescription::AddDeviceDescs(
				platforms[openclPlatformIndex],
				DEVICE_TYPE_OPENCL_ALL, deviceDescriptions);
		}
	}
#endif

	// Print device info
	for (size_t i = 0; i < deviceDescriptions.size(); ++i) {
		DeviceDescription *desc = deviceDescriptions[i];
		LR_LOG(this, "Device " << i << " name: " <<
			desc->GetName());

		LR_LOG(this, "Device " << i << " type: " <<
			DeviceDescription::GetDeviceType(desc->GetType()));

		LR_LOG(this, "Device " << i << " compute units: " <<
			desc->GetComputeUnits());

		LR_LOG(this, "Device " << i << " preferred float vector width: " <<
			desc->GetNativeVectorWidthFloat());

		LR_LOG(this, "Device " << i << " max allocable memory: " <<
			desc->GetMaxMemory() / (1024 * 1024) << "MBytes");

		LR_LOG(this, "Device " << i << " max allocable memory block size: " <<
			desc->GetMaxMemoryAllocSize() / (1024 * 1024) << "MBytes");
	}
}
예제 #8
0
파일: ocl.cpp 프로젝트: scollinson/luxrays
cl::Program *oclKernelVolatileCache::Compile(cl::Context &context, cl::Device& device,
		const std::string &kernelsParameters, const std::string &kernelSource,
		bool *cached, cl::STRING_CLASS *error) {
	// Check if the kernel is available in the cache
	std::map<std::string, cl::Program::Binaries>::iterator it = kernelCache.find(kernelsParameters);

	if (it == kernelCache.end()) {
		// It isn't available, compile the source
		cl::Program *program = ForcedCompile(
				context, device, kernelsParameters, kernelSource, error);
		if (!program)
			return NULL;

		// Obtain the binaries of the sources
		VECTOR_CLASS<char *> bins = program->getInfo<CL_PROGRAM_BINARIES>();
		assert (bins.size() == 1);
		VECTOR_CLASS<size_t> sizes = program->getInfo<CL_PROGRAM_BINARY_SIZES>();
		assert (sizes.size() == 1);

		if (sizes[0] > 0) {
			// Add the kernel to the cache
			char *bin = new char[sizes[0]];
			memcpy(bin, bins[0], sizes[0]);
			kernels.push_back(bin);

			kernelCache[kernelsParameters] = cl::Program::Binaries(1, std::make_pair(bin, sizes[0]));
		}

		if (cached)
			*cached = false;

		return program;
	} else {
		// Compile from the binaries
		VECTOR_CLASS<cl::Device> buildDevice;
		buildDevice.push_back(device);
		cl::Program *program = new cl::Program(context, buildDevice, it->second);
		program->build(buildDevice);

		if (cached)
			*cached = true;

		return program;
	}
}
예제 #9
0
void OpenCLPrinter::printPlatformAndDeviceInfo()
{
	VECTOR_CLASS<cl::Platform> platforms;
	cl::Platform::get(&platforms);

	VECTOR_CLASS<cl::Device> devices;
    for(unsigned int i = 0; i < platforms.size(); i++)
    {
    	printPlatformInfo(platforms[i]);

		platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices);

		for(unsigned int j = 0; j < devices.size(); j++)
		{
            printDeviceInfo(devices[j]);
		}
    }
    print("Number of platforms", platforms.size());
    print("Number of devices", devices.size());
}
예제 #10
0
파일: ocl.cpp 프로젝트: scollinson/luxrays
cl::Program *oclKernelPersistentCache::Compile(cl::Context &context, cl::Device& device,
		const std::string &kernelsParameters, const std::string &kernelSource,
		bool *cached, cl::STRING_CLASS *error) {
	// Check if the kernel is available in the cache

	cl::Platform platform = device.getInfo<CL_DEVICE_PLATFORM>();
	std::string platformName = platform.getInfo<CL_PLATFORM_VENDOR>();
	std::string deviceName = device.getInfo<CL_DEVICE_NAME>();
	std::string deviceUnits = ToString(device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>());
	std::string kernelName = HashString(kernelsParameters) + "-" + HashString(kernelSource) + ".ocl";
	std::string dirName = "kernel_cache/" + appName + "/" + platformName + "/" + deviceName + "/" + deviceUnits;
	std::string fileName = dirName +"/" +kernelName;
	
	if (!boost::filesystem::exists(fileName)) {
		// It isn't available, compile the source
		cl::Program *program = ForcedCompile(
				context, device, kernelsParameters, kernelSource, error);
		if (!program)
			return NULL;

		// Obtain the binaries of the sources
		VECTOR_CLASS<char *> bins = program->getInfo<CL_PROGRAM_BINARIES>();
		assert (bins.size() == 1);
		VECTOR_CLASS<size_t> sizes = program->getInfo<CL_PROGRAM_BINARY_SIZES >();
		assert (sizes.size() == 1);

		// Create the file only if the binaries include something
		if (sizes[0] > 0) {
			// Add the kernel to the cache
			boost::filesystem::create_directories(dirName);
			BOOST_OFSTREAM file(fileName.c_str(), std::ios_base::out | std::ios_base::binary);
			file.write(bins[0], sizes[0]);

			// Check for errors
			char buf[512];
			if (file.fail()) {
				sprintf(buf, "Unable to write kernel file cache %s", fileName.c_str());
				throw std::runtime_error(buf);
			}

			file.close();
		}

		if (cached)
			*cached = false;

		return program;
	} else {
		const size_t kernelSize = boost::filesystem::file_size(fileName);

		if (kernelSize > 0) {
			char *kernelBin = new char[kernelSize];

			BOOST_IFSTREAM file(fileName.c_str(), std::ios_base::in | std::ios_base::binary);
			file.read(kernelBin, kernelSize);

			// Check for errors
			char buf[512];
			if (file.fail()) {
				sprintf(buf, "Unable to read kernel file cache %s", fileName.c_str());
				throw std::runtime_error(buf);
			}

			file.close();

			// Compile from the binaries
			VECTOR_CLASS<cl::Device> buildDevice;
			buildDevice.push_back(device);
			cl::Program *program = new cl::Program(context, buildDevice,
					cl::Program::Binaries(1, std::make_pair(kernelBin, kernelSize)));
			program->build(buildDevice);

			if (cached)
				*cached = true;

			delete[] kernelBin;

			return program;
		} else {
			// Something wrong in the file, remove the file and retry
			boost::filesystem::remove(fileName);

			return Compile(context, device, kernelsParameters, kernelSource, cached, error);
		}
	}
}
예제 #11
0
 void printDeviceInfo(const cl::Device &device, cl_device_info info) {
     if (!initialized) {
         printf("not initialized. call initCLUtility().");
     }
     
     printf("%s: ", clDeviceInfoMetaInfos[info].name);
     
     switch (clDeviceInfoMetaInfos[info].infoType) {
         case CLDeviceInfoType_bool: {
             cl_bool val;
             device.getInfo(info, &val);
             printf(val != 0 ? "YES" : "NO");
             break;
         }
         case CLDeviceInfoType_uint: {
             cl_uint val;
             device.getInfo(info, &val);
             printf("%u", val);
             break;
         }
         case CLDeviceInfoType_ulong: {
             cl_ulong val;
             device.getInfo(info, &val);
             printf("%llu", val);
             break;
         }
         case CLDeviceInfoType_size_t: {
             size_t val;
             device.getInfo(info, &val);
             printf("%lu", val);
             break;
         }
         case CLDeviceInfoType_string: {
             std::string val;
             device.getInfo(info, &val);
             printf("%s", val.c_str());
             break;
         }
         case CLDeviceInfoType_size_t_vec: {
             VECTOR_CLASS<size_t> val;
             device.getInfo(info, &val);
             for (uint32_t i = 0; i < val.size() - 1; ++i) {
                 printf("%lu, ", val[i]);
             }
             printf("%lu", val.back());
             break;
         }
         case CLDeviceInfoType_device_id: {
             cl_device_id val;
             device.getInfo(info, &val);
             printf("%#018llx", (uint64_t)val);
             break;
         }
         case CLDeviceInfoType_platform_id: {
             cl_platform_id val;
             device.getInfo(info, &val);
             printf("%#018llx", (uint64_t)val);
             break;
         }
         case CLDeviceInfoType_device_type: {
             cl_device_type val;
             device.getInfo(info, &val);
             switch (val) {
                 case CL_DEVICE_TYPE_CPU:
                     printf("CPU");
                     break;
                 case CL_DEVICE_TYPE_GPU:
                     printf("GPU");
                     break;
                 case CL_DEVICE_TYPE_ACCELERATOR:
                     printf("Accelerator");
                     break;
                 case CL_DEVICE_TYPE_CUSTOM:
                     printf("Custom");
                     break;
                 default:
                     break;
             }
             break;
         }
         case CLDeviceInfoType_device_fp_config: {
             cl_device_fp_config val;
             device.getInfo(info, &val);
             if ((val & CL_FP_DENORM) != 0)
                 printf("CL_FP_DENORM ");
             if ((val & CL_FP_INF_NAN) != 0)
                 printf("CL_FP_INF_NAN ");
             if ((val & CL_FP_ROUND_TO_NEAREST) != 0)
                 printf("CL_FP_ROUND_TO_NEAREST ");
             if ((val & CL_FP_ROUND_TO_ZERO) != 0)
                 printf("CL_FP_ROUND_TO_ZERO ");
             if ((val & CL_FP_ROUND_TO_INF) != 0)
                 printf("CL_FP_ROUND_TO_INF ");
             if ((val & CL_FP_FMA) != 0)
                 printf("CL_FP_FMA ");
             if ((val & CL_FP_SOFT_FLOAT) != 0)
                 printf("CL_FP_SOFT_FLOAT ");
             if ((val & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) != 0)
                 printf("CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ");
             break;
         }
         case CLDeviceInfoType_device_local_mem_type: {
             cl_device_local_mem_type val;
             device.getInfo(info, &val);
             switch (val) {
                 case CL_LOCAL:
                     printf("Local");
                     break;
                 case CL_GLOBAL:
                     printf("Global");
                 default:
                     break;
             }
             break;
         }
         case CLDeviceInfoType_device_mem_cache_type: {
             cl_device_mem_cache_type val;
             device.getInfo(info, &val);
             switch (val) {
                 case CL_NONE:
                     printf("None");
                     break;
                 case CL_READ_ONLY_CACHE:
                     printf("Read Only Cache");
                     break;
                 case CL_READ_WRITE_CACHE:
                     printf("Read Write Cache");
                     break;
                 default:
                     break;
             }
             break;
         }
         case CLDeviceInfoType_device_exec_capabilities: {
             cl_device_exec_capabilities val;
             device.getInfo(info, &val);
             if ((val & CL_EXEC_KERNEL) != 0)
                 printf("CL_EXEC_KERNEL ");
             if ((val & CL_EXEC_NATIVE_KERNEL) != 0)
                 printf("CL_EXEC_NATIVE_KERNEL ");
             break;
         }
         case CLDeviceInfoType_command_queue_properties: {
             cl_command_queue_properties val;
             device.getInfo(info, &val);
             if ((val & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0)
                 printf("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ");
             if ((val & CL_QUEUE_PROFILING_ENABLE) != 0)
                 printf("CL_QUEUE_PROFILING_ENABLE ");
             break;
         }
         case CLDeviceInfoType_device_affinity_domain: {
             cl_device_affinity_domain val;
             device.getInfo(info, &val);
             if ((val & CL_DEVICE_AFFINITY_DOMAIN_NUMA) != 0)
                 printf("CL_DEVICE_AFFINITY_DOMAIN_NUMA ");
             if ((val & CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE) != 0)
                 printf("CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ");
             if ((val & CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE) != 0)
                 printf("CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ");
             if ((val & CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE) != 0)
                 printf("CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ");
             if ((val & CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE) != 0)
                 printf("CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE ");
             if ((val & CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE) != 0)
                 printf("CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE ");
             break;
         }
         case CLDeviceInfoType_device_partition_property_vec: {
             VECTOR_CLASS<cl_device_partition_property> val;
             device.getInfo(info, &val);
             for (uint32_t i = 0; i < val.size(); ++i) {
                 switch (val[i]) {
                     case CL_DEVICE_PARTITION_EQUALLY:
                         printf("CL_DEVICE_PARTITION_EQUALLY");
                         break;
                     case CL_DEVICE_PARTITION_BY_COUNTS:
                         printf("CL_DEVICE_PARTITION_BY_COUNTS");
                         break;
                     case CL_DEVICE_PARTITION_BY_COUNTS_LIST_END:
                         printf("CL_DEVICE_PARTITION_BY_COUNTS_LIST_END");
                         break;
                     case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
                         printf("CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN");
                         break;
                     default:
                         break;
                 }
                 if (i < val.size() - 1)
                     printf(", ");
             }
             break;
         }
         default:
             break;
     }
     printf("\n");
 }
예제 #12
0
OCLRendererThread::OCLRendererThread(const size_t threadIndex, OCLRenderer *renderer,
			cl::Device device) : index(threadIndex), renderer(renderer),
		dev(device), usedDeviceMemory(0) {
	const GameLevel &gameLevel(*(renderer->gameLevel));
	const unsigned int width = gameLevel.gameConfig->GetScreenWidth();
	const unsigned int height = gameLevel.gameConfig->GetScreenHeight();

	const CompiledScene &compiledScene(*(renderer->compiledScene));

	if (renderer->renderThread.size() > 1)
		cpuFrameBuffer = new FrameBuffer(width, height);
	else
		cpuFrameBuffer = NULL;

	//--------------------------------------------------------------------------
	// OpenCL setup
	//--------------------------------------------------------------------------

	// Allocate a context with the selected device

	VECTOR_CLASS<cl::Device> devices;
	devices.push_back(dev);
	cl::Platform platform = dev.getInfo<CL_DEVICE_PLATFORM>();

	// The first thread uses OpenCL/OpenGL interoperability
	if (index == 0) {
#if defined (__APPLE__)
		CGLContextObj kCGLContext = CGLGetCurrentContext();
		CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
		cl_context_properties cps[] = {
			CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup,
			0
		};
#else
#ifdef WIN32
		cl_context_properties cps[] = {
			CL_GL_CONTEXT_KHR, (intptr_t)wglGetCurrentContext(),
			CL_WGL_HDC_KHR, (intptr_t)wglGetCurrentDC(),
			CL_CONTEXT_PLATFORM, (cl_context_properties)platform(),
			0
		};
#else
		cl_context_properties cps[] = {
			CL_GL_CONTEXT_KHR, (intptr_t)glXGetCurrentContext(),
			CL_GLX_DISPLAY_KHR, (intptr_t)glXGetCurrentDisplay(),
			CL_CONTEXT_PLATFORM, (cl_context_properties)platform(),
			0
		};
#endif
#endif

		ctx = new cl::Context(devices, cps);
	} else
		ctx = new cl::Context(devices);

	// Allocate the queue for this device
	cmdQueue = new cl::CommandQueue(*ctx, dev);

	//--------------------------------------------------------------------------
	// Allocate the buffers
	//--------------------------------------------------------------------------

	passFrameBuffer = NULL;
	tmpFrameBuffer = NULL;
	frameBuffer = NULL;
	toneMapFrameBuffer = NULL;
	bvhBuffer = NULL;
	gpuTaskBuffer = NULL;
	cameraBuffer = NULL;
	infiniteLightBuffer = NULL;
	matBuffer = NULL;
	matIndexBuffer = NULL;
	texMapBuffer = NULL;
	texMapRGBBuffer = NULL;
	texMapInstanceBuffer = NULL;
	bumpMapInstanceBuffer = NULL;

	AllocOCLBufferRW(&passFrameBuffer, sizeof(Pixel) * width * height, "Pass FrameBuffer");
	AllocOCLBufferRW(&tmpFrameBuffer, sizeof(Pixel) * width * height, "Temporary FrameBuffer");
	if (index == 0) {
		AllocOCLBufferRW(&frameBuffer, sizeof(Pixel) * width * height, "FrameBuffer");
		AllocOCLBufferRW(&toneMapFrameBuffer, sizeof(Pixel) * width * height, "ToneMap FrameBuffer");
	}
	AllocOCLBufferRW(&gpuTaskBuffer, sizeof(ocl_kernels::GPUTask) * width * height, "GPUTask");
	AllocOCLBufferRO(&cameraBuffer, sizeof(compiledscene::Camera), "Camera");
	AllocOCLBufferRO(&infiniteLightBuffer, (void *)(gameLevel.scene->infiniteLight->GetTexture()->GetTexMap()->GetPixels()),
			sizeof(Spectrum) * gameLevel.scene->infiniteLight->GetTexture()->GetTexMap()->GetWidth() *
			gameLevel.scene->infiniteLight->GetTexture()->GetTexMap()->GetHeight(), "Inifinite Light");

	AllocOCLBufferRO(&matBuffer, (void *)(&compiledScene.mats[0]),
			sizeof(compiledscene::Material) * compiledScene.mats.size(), "Materials");
	AllocOCLBufferRO(&matIndexBuffer, (void *)(&compiledScene.sphereMats[0]),
			sizeof(unsigned int) * compiledScene.sphereMats.size(), "Material Indices");

	if (compiledScene.texMaps.size() > 0) {
		AllocOCLBufferRO(&texMapBuffer, (void *)(&compiledScene.texMaps[0]),
				sizeof(compiledscene::TexMap) * compiledScene.texMaps.size(), "Texture Maps");

		AllocOCLBufferRO(&texMapRGBBuffer, (void *)(compiledScene.rgbTexMem),
				sizeof(Spectrum) * compiledScene.totRGBTexMem, "Texture Map Images");

		AllocOCLBufferRO(&texMapInstanceBuffer, (void *)(&compiledScene.sphereTexs[0]),
				sizeof(compiledscene::TexMapInstance) * compiledScene.sphereTexs.size(), "Texture Map Instances");

		if (compiledScene.sphereBumps.size() > 0)
			AllocOCLBufferRO(&bumpMapInstanceBuffer, (void *)(&compiledScene.sphereBumps[0]),
					sizeof(compiledscene::BumpMapInstance) * compiledScene.sphereBumps.size(), "Bump Map Instances");
	}

	SFERA_LOG("[OCLRenderer] Total OpenCL device memory used: " << fixed << setprecision(2) << usedDeviceMemory / (1024 * 1024) << "Mbytes");

	if (index == 0) {
		//--------------------------------------------------------------------------
		// Create pixel buffer object for display
		//--------------------------------------------------------------------------

		glGenBuffersARB(1, &pbo);
		glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
		glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, width * height *
				sizeof(GLubyte) * 4, 0, GL_STREAM_DRAW_ARB);
		glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
		pboBuff = new cl::BufferGL(*ctx, CL_MEM_READ_WRITE, pbo);
	}

	//--------------------------------------------------------------------------
	// Compile the kernel source
	//--------------------------------------------------------------------------

	// Set #define symbols
	stringstream ss;
	ss.precision(6);
	ss << scientific <<
			" -D PARAM_SCREEN_WIDTH=" << width <<
			" -D PARAM_SCREEN_HEIGHT=" << height <<
			" -D PARAM_SCREEN_SAMPLEPERPASS="******" -D PARAM_RAY_EPSILON=" << EPSILON << "f" <<
			" -D PARAM_MAX_DIFFUSE_BOUNCE=" << gameLevel.maxPathDiffuseBounces <<
			" -D PARAM_MAX_SPECULARGLOSSY_BOUNCE=" << gameLevel.maxPathSpecularGlossyBounces <<
			" -D PARAM_IL_SHIFT_U=" << gameLevel.scene->infiniteLight->GetShiftU() << "f" <<
			" -D PARAM_IL_SHIFT_V=" << gameLevel.scene->infiniteLight->GetShiftV() << "f" <<
			" -D PARAM_IL_GAIN_R=" << gameLevel.scene->infiniteLight->GetGain().r << "f" <<
			" -D PARAM_IL_GAIN_G=" << gameLevel.scene->infiniteLight->GetGain().g << "f" <<
			" -D PARAM_IL_GAIN_B=" << gameLevel.scene->infiniteLight->GetGain().b << "f" <<
			" -D PARAM_IL_MAP_WIDTH=" << gameLevel.scene->infiniteLight->GetTexture()->GetTexMap()->GetWidth() <<
			" -D PARAM_IL_MAP_HEIGHT=" << gameLevel.scene->infiniteLight->GetTexture()->GetTexMap()->GetHeight() <<
			" -D PARAM_GAMMA=" << gameLevel.toneMap->GetGamma() << "f" <<
			" -D PARAM_MEM_TYPE=" << gameLevel.gameConfig->GetOpenCLMemType();

	if (compiledScene.enable_MAT_MATTE)
		ss << " -D PARAM_ENABLE_MAT_MATTE";
	if (compiledScene.enable_MAT_MIRROR)
		ss << " -D PARAM_ENABLE_MAT_MIRROR";
	if (compiledScene.enable_MAT_GLASS)
		ss << " -D PARAM_ENABLE_MAT_GLASS";
	if (compiledScene.enable_MAT_METAL)
		ss << " -D PARAM_ENABLE_MAT_METAL";
	if (compiledScene.enable_MAT_ALLOY)
		ss << " -D PARAM_ENABLE_MAT_ALLOY";

	if (texMapBuffer) {
		ss << " -D PARAM_HAS_TEXTUREMAPS";

		if (compiledScene.sphereBumps.size() > 0)
			ss << " -D PARAM_HAS_BUMPMAPS";
	}

	switch (gameLevel.toneMap->GetType()) {
		case TONEMAP_REINHARD02:
			ss << " -D PARAM_TM_LINEAR_SCALE=1.0f";
			break;
		case TONEMAP_LINEAR: {
			LinearToneMap *tm = (LinearToneMap *)gameLevel.toneMap;
			ss << " -D PARAM_TM_LINEAR_SCALE=" << tm->scale << "f";
			break;
		}
		default:
			assert (false);

	}

#if defined(__APPLE__)
	ss << " -D __APPLE__";
#endif

	SFERA_LOG("[OCLRenderer] Defined symbols: " << ss.str());
	SFERA_LOG("[OCLRenderer] Compiling kernels");

	cl::Program::Sources source(1, std::make_pair(KernelSource_kernel_core.c_str(), KernelSource_kernel_core.length()));
	cl::Program program = cl::Program(*ctx, source);
	try {
		VECTOR_CLASS<cl::Device> buildDevice;
		buildDevice.push_back(dev);
		program.build(buildDevice, ss.str().c_str());
	} catch (cl::Error err) {
		cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev);
		SFERA_LOG("[OCLRenderer] Kernel compilation error:\n" << strError.c_str());

		throw err;
	}

	kernelInit = new cl::Kernel(program, "Init");
	kernelInit->setArg(0, *gpuTaskBuffer);
	cmdQueue->enqueueNDRangeKernel(*kernelInit, cl::NullRange,
			cl::NDRange(RoundUp<unsigned int>(width * height, WORKGROUP_SIZE)),
			cl::NDRange(WORKGROUP_SIZE));

	kernelInitFrameBuffer = new cl::Kernel(program, "InitFB");
	if (index == 0) {
		kernelInitFrameBuffer->setArg(0, *frameBuffer);
		cmdQueue->enqueueNDRangeKernel(*kernelInitFrameBuffer, cl::NullRange,
			cl::NDRange(RoundUp<unsigned int>(width * height, WORKGROUP_SIZE)),
			cl::NDRange(WORKGROUP_SIZE));
	}
	kernelInitFrameBuffer->setArg(0, *passFrameBuffer);

	kernelPathTracing = new cl::Kernel(program, "PathTracing");
	unsigned int argIndex = 0;
	kernelPathTracing->setArg(argIndex++, *gpuTaskBuffer);
	argIndex++;
	kernelPathTracing->setArg(argIndex++, *cameraBuffer);
	kernelPathTracing->setArg(argIndex++, *infiniteLightBuffer);
	kernelPathTracing->setArg(argIndex++, *passFrameBuffer);
	kernelPathTracing->setArg(argIndex++, *matBuffer);
	kernelPathTracing->setArg(argIndex++, *matIndexBuffer);
	if (texMapBuffer) {
		kernelPathTracing->setArg(argIndex++, *texMapBuffer);
		kernelPathTracing->setArg(argIndex++, *texMapRGBBuffer);
		kernelPathTracing->setArg(argIndex++, *texMapInstanceBuffer);
		if (compiledScene.sphereBumps.size() > 0)
			kernelPathTracing->setArg(argIndex++, *bumpMapInstanceBuffer);
	}

	kernelApplyBlurLightFilterXR1 = new cl::Kernel(program, "ApplyBlurLightFilterXR1");
	kernelApplyBlurLightFilterXR1->setArg(0, *passFrameBuffer);
	kernelApplyBlurLightFilterXR1->setArg(1, *tmpFrameBuffer);

	kernelApplyBlurLightFilterYR1 = new cl::Kernel(program, "ApplyBlurLightFilterYR1");
	kernelApplyBlurLightFilterYR1->setArg(0, *tmpFrameBuffer);
	kernelApplyBlurLightFilterYR1->setArg(1, *passFrameBuffer);

	kernelApplyBlurHeavyFilterXR1 = new cl::Kernel(program, "ApplyBlurHeavyFilterXR1");
	kernelApplyBlurHeavyFilterXR1->setArg(0, *passFrameBuffer);
	kernelApplyBlurHeavyFilterXR1->setArg(1, *tmpFrameBuffer);

	kernelApplyBlurHeavyFilterYR1 = new cl::Kernel(program, "ApplyBlurHeavyFilterYR1");
	kernelApplyBlurHeavyFilterYR1->setArg(0, *tmpFrameBuffer);
	kernelApplyBlurHeavyFilterYR1->setArg(1, *passFrameBuffer);

	kernelApplyBoxFilterXR1 = new cl::Kernel(program, "ApplyBoxFilterXR1");
	kernelApplyBoxFilterXR1->setArg(0, *passFrameBuffer);
	kernelApplyBoxFilterXR1->setArg(1, *tmpFrameBuffer);

	kernelApplyBoxFilterYR1 = new cl::Kernel(program, "ApplyBoxFilterYR1");
	kernelApplyBoxFilterYR1->setArg(0, *tmpFrameBuffer);
	kernelApplyBoxFilterYR1->setArg(1, *passFrameBuffer);

	if (index == 0) {
		kernelBlendFrame = new cl::Kernel(program, "BlendFrame");
		kernelBlendFrame->setArg(0, *passFrameBuffer);
		kernelBlendFrame->setArg(1, *frameBuffer);

		kernelToneMapLinear = new cl::Kernel(program, "ToneMapLinear");
		kernelToneMapLinear->setArg(0, *frameBuffer);
		kernelToneMapLinear->setArg(1, *toneMapFrameBuffer);

		kernelUpdatePixelBuffer = new cl::Kernel(program, "UpdatePixelBuffer");
		kernelUpdatePixelBuffer->setArg(0, *toneMapFrameBuffer);
		kernelUpdatePixelBuffer->setArg(1, *pboBuff);
	} else {
		kernelBlendFrame = NULL;
		kernelToneMapLinear = NULL;
		kernelUpdatePixelBuffer = NULL;
	}
}
예제 #13
0
RenderDevice::RenderDevice(const cl::Device &device, const string &kernelFileName,
		const unsigned int forceGPUWorkSize,
		Camera *camera, Sphere *spheres, const unsigned int sceneSphereCount/*,
		boost::barrier *startBarrier, boost::barrier *endBarrier*/) :
	/*renderThread(NULL), threadStartBarrier(startBarrier), threadEndBarrier(endBarrier),*/
	sphereCount(sceneSphereCount), colorBuffer(NULL), pixelBuffer(NULL), seedBuffer(NULL),
	pixels(NULL), colors(NULL), seeds(NULL), exeUnitCount(0.0), exeTime(0.0) {
	deviceName = "anonymouse";//device.getInfo<CL_DEVICE_NAME > ().c_str();

	// Allocate a context with the selected device
	cl::Platform platform = device.getInfo<CL_DEVICE_PLATFORM>();
	VECTOR_CLASS<cl::Device> devices;
	devices.push_back(device);
	cl_context_properties cps[3] = {
		CL_CONTEXT_PLATFORM, (cl_context_properties)platform(), 0
	};
	context = new cl::Context(devices, cps);

	// Allocate the queue for this device
	cl_command_queue_properties prop = CL_QUEUE_PROFILING_ENABLE;
	queue = new cl::CommandQueue(*context, device, prop);

	// Create the kernel
	string src = ReadSources(kernelFileName);

	// Compile sources
	cl::Program::Sources source(1, make_pair(src.c_str(), src.length()));
	cl::Program program = cl::Program(*context, source);
	try {
		VECTOR_CLASS<cl::Device> buildDevice;
		buildDevice.push_back(device);
#if defined(__EMSCRIPTEN__)
		program.build(buildDevice, "");
#elif defined(__APPLE__)
		program.build(buildDevice, "-D__APPLE__");
#else
		program.build(buildDevice, "");
#endif
		cl::string result = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
		cerr << "[Device::" << deviceName << "]" << " Compilation result: " << result.c_str() << endl;
	} catch (cl::Error err) {
		cl::string strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
		cerr << "[Device::" << deviceName << "]" << " Compilation error:" << endl << strError.c_str() << endl;

		throw err;
	}

	kernel = new cl::Kernel(program, "RadianceGPU");

	kernel->getWorkGroupInfo<size_t>(device, CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize);
	cerr << "[Device::" << deviceName << "]" << " Suggested work group size: " << workGroupSize << endl;

	// Force workgroup size if applicable and required
	if ((forceGPUWorkSize > 0) && (device.getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_GPU)) {
		workGroupSize = forceGPUWorkSize;
		cerr << "[Device::" << deviceName << "]" << " Forced work group size: " << workGroupSize << endl;
	}

	// Create the thread for the rendering
	//renderThread = new boost::thread(boost::bind(RenderDevice::RenderThread, this));

	// Create camera buffer
	cameraBuffer = new cl::Buffer(*context,
#if defined (__APPLE__)
			CL_MEM_READ_ONLY, // CL_MEM_USE_HOST_PTR is very slow with Apple's OpenCL
#else
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
#endif
				sizeof(Camera),
				camera);
	cerr << "[Device::" << deviceName << "] Camera buffer size: " << (sizeof(Camera) / 1024) << "Kb" << endl;

	sphereBuffer = new cl::Buffer(*context,
#if defined (__APPLE__)
			CL_MEM_READ_ONLY, // CL_MEM_USE_HOST_PTR is very slow with Apple's OpenCL
#else
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
#endif
			sizeof(Sphere) * sphereCount,
			spheres);
	cerr << "[Device::" << deviceName << "] Scene buffer size: " << (sizeof(Sphere) * sphereCount / 1024) << "Kb" << endl;
}
예제 #14
0
int main(int argc, char* argv[]) {
	cl_int retCode;

	// get platforms
	const cl_platform_info PLATFORM_INFOS[] = {
		CL_PLATFORM_NAME,
		CL_PLATFORM_VENDOR,
		CL_PLATFORM_PROFILE,
		CL_PLATFORM_VERSION
	};
	VECTOR_CLASS<Platform> platforms;

	retCode = Platform::get(&platforms);
	if (retCode != CL_SUCCESS)
		die("failed to get platforms");

	std::cout << "platforms found: " << platforms.size() << std::endl;
	for (uint i = 0; i < platforms.size(); i++) {
		std::cout << "platform " << i+1 << ": ";
		for (uint j = 0; j < (sizeof(PLATFORM_INFOS) / sizeof(cl_platform_info)); j++) {
			STRING_CLASS info;
			retCode = platforms[i].getInfo(PLATFORM_INFOS[j], &info);
			if (retCode != CL_SUCCESS)
				die("failed to get platform info");
			std::cout << info << " ";
		}
		std::cout << std::endl;

		// get devices
		const cl_device_info DEVICE_INFOS[] = {
			CL_DEVICE_NAME,
			CL_DEVICE_VENDOR,
			CL_DEVICE_PROFILE,
			CL_DEVICE_VERSION,
			CL_DRIVER_VERSION,
			CL_DEVICE_OPENCL_C_VERSION
		};
		VECTOR_CLASS<Device> devices;

		retCode = platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices);
		if (retCode != CL_SUCCESS)
			die("  failed to get devices");

		std::cout << "  devices found: " << devices.size() << std::endl;
		for (uint j = 0; j < devices.size(); j++) {
			std::cout << "  device " << j+1 << ": ";
			for (uint k = 0; k < (sizeof(DEVICE_INFOS) / sizeof(cl_device_info)); k++) {
				STRING_CLASS info;
				retCode = devices[j].getInfo(DEVICE_INFOS[k], &info);
				if (retCode != CL_SUCCESS)
					die("  failed to get device info");
				std::cout << info << " ";
			}

			cl_ulong memSize;
			retCode = devices[j].getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &memSize);
			if (retCode != CL_SUCCESS)
				die("  failed to get device info");
			std::cout << "GlobalMemSize:" << memSize / 1024 / 1024 << "MB ";

			retCode = devices[j].getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &memSize);
			if (retCode != CL_SUCCESS)
				die("  failed to get device info");
			std::cout << "LocalMemSize:" << memSize / 1024 << "KB ";

			std::cout << std::endl;
		}
	}
}
예제 #15
0
void OCLRendererThread::DrawFrame() {
	const GameLevel &gameLevel(*(renderer->gameLevel));
	const GameConfig &gameConfig(*(gameLevel.gameConfig));
	const unsigned int width = gameConfig.GetScreenWidth();
	const unsigned int height = gameConfig.GetScreenHeight();

	//--------------------------------------------------------------------------
	// Merge all the framebuffers if required
	//--------------------------------------------------------------------------

	const size_t threadCount = renderer->renderThread.size();
	if (threadCount > 1) {
		vector<Pixel *> cpuFrameBuffers(threadCount);
		for (size_t i = 0; i < threadCount; ++i)
				cpuFrameBuffers[i] = renderer->renderThread[i]->cpuFrameBuffer->GetPixels();

		Pixel *dst = cpuFrameBuffers[0];
		for (size_t i = 0; i < width * height; ++i) {
			float r = dst->r;
			float g = dst->g;
			float b = dst->b;

			for (size_t j = 1; j < threadCount; ++j) {
				r += cpuFrameBuffers[j]->r;
				g += cpuFrameBuffers[j]->g;
				b += cpuFrameBuffers[j]->b;

				cpuFrameBuffers[j] += 1;
			}

			dst->r = r;
			dst->g = g;
			dst->b = b;

			++dst;
		}

		cmdQueue->enqueueWriteBuffer(*passFrameBuffer,
					CL_FALSE, 0, sizeof(Pixel) * width * height, cpuFrameBuffers[0]);
	}

	//--------------------------------------------------------------------------
	// Blend the new frame with the old one
	//--------------------------------------------------------------------------

	kernelBlendFrame->setArg(2, renderer->blendFactor);

	cmdQueue->enqueueNDRangeKernel(*kernelBlendFrame, cl::NullRange,
			cl::NDRange(RoundUp<unsigned int>(width * height, WORKGROUP_SIZE)),
			cl::NDRange(WORKGROUP_SIZE));

	//--------------------------------------------------------------------------
	// Tone mapping
	//--------------------------------------------------------------------------

	switch (gameLevel.toneMap->GetType()) {
		case TONEMAP_REINHARD02:
		case TONEMAP_LINEAR:
			cmdQueue->enqueueNDRangeKernel(*kernelToneMapLinear, cl::NullRange,
			cl::NDRange(RoundUp<unsigned int>(width * height, WORKGROUP_SIZE)),
			cl::NDRange(WORKGROUP_SIZE));
			break;
		default:
			assert (false);

	}

	//--------------------------------------------------------------------------
	// Copy the OpenCL frame buffer to OpenGL one
	//--------------------------------------------------------------------------

	VECTOR_CLASS<cl::Memory> buffs;
	buffs.push_back(*pboBuff);
	cmdQueue->enqueueAcquireGLObjects(&buffs);

	cmdQueue->enqueueNDRangeKernel(*kernelUpdatePixelBuffer, cl::NullRange,
			cl::NDRange(RoundUp<unsigned int>(width * height, WORKGROUP_SIZE)),
			cl::NDRange(WORKGROUP_SIZE));

	cmdQueue->enqueueReleaseGLObjects(&buffs);
	cmdQueue->finish();

	// Draw the image on the screen
	glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
    glDrawPixels(width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0);
    glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
}
예제 #16
0
OCLRenderer::OCLRenderer(GameLevel *level) : LevelRenderer(level) {
	compiledScene = new CompiledScene(level);

	timeSinceLastCameraEdit = WallClockTime();
	timeSinceLastNoCameraEdit = timeSinceLastCameraEdit;

	//--------------------------------------------------------------------------
	// OpenCL setup
	//--------------------------------------------------------------------------

	vector<cl::Device> selectedDevices;

	// Scan all platforms and devices available
	VECTOR_CLASS<cl::Platform> platforms;
	cl::Platform::get(&platforms);
	const string &selectString = gameLevel->gameConfig->GetOpenCLDeviceSelect();
	size_t selectIndex = 0;
	for (size_t i = 0; i < platforms.size(); ++i) {
		SFERA_LOG("[OCLRenderer] OpenCL Platform " << i << ": " << platforms[i].getInfo<CL_PLATFORM_VENDOR>());

		// Get the list of devices available on the platform
		VECTOR_CLASS<cl::Device> devices;
		platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices);

		for (size_t j = 0; j < devices.size(); ++j) {
			SFERA_LOG("[OCLRenderer]   OpenCL device " << j << ": " << devices[j].getInfo<CL_DEVICE_NAME>());
			SFERA_LOG("[OCLRenderer]     Type: " << OCLDeviceTypeString(devices[j].getInfo<CL_DEVICE_TYPE>()));
			SFERA_LOG("[OCLRenderer]     Units: " << devices[j].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>());
			SFERA_LOG("[OCLRenderer]     Global memory: " << devices[j].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() / 1024 << "Kbytes");
			SFERA_LOG("[OCLRenderer]     Local memory: " << devices[j].getInfo<CL_DEVICE_LOCAL_MEM_SIZE>() / 1024 << "Kbytes");
			SFERA_LOG("[OCLRenderer]     Local memory type: " << OCLLocalMemoryTypeString(devices[j].getInfo<CL_DEVICE_LOCAL_MEM_TYPE>()));
			SFERA_LOG("[OCLRenderer]     Constant memory: " << devices[j].getInfo<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE>() / 1024 << "Kbytes");

			bool selected = false;
			if (!gameLevel->gameConfig->GetOpenCLUseOnlyGPUs() || (devices[j].getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_GPU)) {
				if (selectString.length() == 0)
					selected = true;
				else {
					if (selectString.length() <= selectIndex)
						throw runtime_error("OpenCL select devices string (opencl.devices.select) has the wrong length");

					if (selectString.at(selectIndex) == '1')
						selected = true;
				}
			}

			if (selected) {
				selectedDevices.push_back(devices[j]);
				SFERA_LOG("[OCLRenderer]     SELECTED");
			} else
				SFERA_LOG("[OCLRenderer]     NOT SELECTED");

			++selectIndex;
		}
	}

	if (selectedDevices.size() == 0)
		throw runtime_error("Unable to find a OpenCL GPU device");

	// Create synchronization barrier
	barrier = new boost::barrier(selectedDevices.size() + 1);

	totSamplePerPass = 0;
	for (size_t i = 0; i < selectedDevices.size(); ++i)
		totSamplePerPass += gameLevel->gameConfig->GetOpenCLDeviceSamplePerPass(i);

	renderThread.resize(selectedDevices.size(), NULL);
	for (size_t i = 0; i < selectedDevices.size(); ++i) {
		OCLRendererThread *rt = new OCLRendererThread(i, this, selectedDevices[i]);
		renderThread[i] = rt;
	}

	for (size_t i = 0; i < renderThread.size(); ++i)
		renderThread[i]->Start();
}
예제 #17
0
bool VNNclAlgorithm::reconstruct(ProcessedUSInputDataPtr input, vtkImageDataPtr outputData, float radius, int nClosePlanes)
{
	mMeasurementNames.clear();

	int numBlocks = 10; // FIXME? needs to be the same as the number of input bscans to the voxel_method kernel

	// Split input US into blocks
	// Splits and copies data from the processed input in the way the kernel will processes it, which is per frameBlock
	frameBlock_t* inputBlocks = new frameBlock_t[numBlocks];
	size_t nPlanes_numberOfInputImages = input->getDimensions()[2];
	this->initializeFrameBlocks(inputBlocks, numBlocks, input);

	// Allocate CL memory for each frame block
	VECTOR_CLASS<cl::Buffer> clBlocks;
	report("Allocating OpenCL input block buffers");
	for (int i = 0; i < numBlocks; i++)
	{
		//TODO why does the context suddenly contain a "dummy" device?
		cl::Buffer buffer = mOulContex->createBuffer(mOulContex->getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, inputBlocks[i].length, inputBlocks[i].data, "block buffer "+QString::number(i).toStdString());
		clBlocks.push_back(buffer);
	}
	// Allocate output memory
	int *outputDims = outputData->GetDimensions();

	size_t outputVolumeSize = outputDims[0] * outputDims[1] * outputDims[2] * sizeof(unsigned char);

	report(QString("Allocating CL output buffer, size %1").arg(outputVolumeSize));

	cl_ulong globalMemUse = 10 * inputBlocks[0].length + outputVolumeSize + sizeof(float) * 16 * nPlanes_numberOfInputImages + sizeof(cl_uchar) * input->getDimensions()[0] * input->getDimensions()[1];
	if(isUsingTooMuchMemory(outputVolumeSize, inputBlocks[0].length, globalMemUse))
		return false;

	cl::Buffer outputBuffer = mOulContex->createBuffer(mOulContex->getContext(), CL_MEM_WRITE_ONLY, outputVolumeSize, NULL, "output volume buffer");

	// Fill the plane matrices
	float *planeMatrices = new float[16 * nPlanes_numberOfInputImages]; //4x4 (matrix) = 16
	this->fillPlaneMatrices(planeMatrices, input);

	cl::Buffer clPlaneMatrices = mOulContex->createBuffer(mOulContex->getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, nPlanes_numberOfInputImages * sizeof(float) * 16, planeMatrices, "plane matrices buffer");

	// US Probe mask
	cl::Buffer clMask = mOulContex->createBuffer(mOulContex->getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
			sizeof(cl_uchar) * input->getMask()->GetDimensions()[0] * input->getMask()->GetDimensions()[1],
			input->getMask()->GetScalarPointer(), "mask buffer");

	double *out_spacing = outputData->GetSpacing();
	float spacings[2];
	float f_out_spacings[3];
	f_out_spacings[0] = out_spacing[0];
	f_out_spacings[1] = out_spacing[1];
	f_out_spacings[2] = out_spacing[2];


	spacings[0] = input->getSpacing()[0];
	spacings[1] = input->getSpacing()[1];

	//TODO why 4? because float4 is used??
	size_t planes_eqs_size =  sizeof(cl_float)*4*nPlanes_numberOfInputImages;

	// Find the optimal local work size
	size_t local_work_size;
	unsigned int deviceNumber = 0;
	cl::Device device = mOulContex->getDevice(deviceNumber);
	mKernel.getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &local_work_size);

	size_t close_planes_size = this->calculateSpaceNeededForClosePlanes(mKernel, device, local_work_size, nPlanes_numberOfInputImages, nClosePlanes);

	this->setKernelArguments(
			mKernel,
			outputDims[0],
			outputDims[1],
			outputDims[2],
			f_out_spacings[0],
			f_out_spacings[1],
			f_out_spacings[2],
			input->getDimensions()[0],
			input->getDimensions()[1],
			spacings[0],
			spacings[1],
			clBlocks,
			outputBuffer,
			clPlaneMatrices,
			clMask,
			planes_eqs_size,
			close_planes_size,
			radius);

	report(QString("Using %1 as local workgroup size").arg(local_work_size));

	// We will divide the work into cubes of CUBE_DIM^3 voxels. The global work size is the total number of voxels divided by that.
	int cube_dim = 4;
	int cube_dim_pow3 = cube_dim * cube_dim * cube_dim;
	// Global work items:
	size_t global_work_size = (((outputDims[0] + cube_dim) * (outputDims[1] + cube_dim) * (outputDims[2] + cube_dim)) / cube_dim_pow3); // = number of cubes = number of kernels to run

	// Round global_work_size up to nearest multiple of local_work_size
	if (global_work_size % local_work_size)
		global_work_size = ((global_work_size / local_work_size) + 1) * local_work_size; // ceil(...)

	unsigned int queueNumber = 0;
	cl::CommandQueue queue = mOulContex->getQueue(queueNumber);
	this->measureAndExecuteKernel(queue, mKernel, global_work_size, local_work_size, mKernelMeasurementName);
	this->measureAndReadBuffer(queue, outputBuffer, outputVolumeSize, outputData->GetScalarPointer(), "vnncl_read_buffer");
	setDeepModified(outputData);
	// Cleaning up
	report(QString("Done, freeing GPU memory"));
	this->freeFrameBlocks(inputBlocks, numBlocks);
	delete[] inputBlocks;

	inputBlocks = NULL;

	return true;
}
예제 #18
0
OCLutil::OCLutil(cl_device_type type,std::string arq,std::string buildOptions,std::string nomeRot,int n)
{

    VECTOR_CLASS<cl::Platform> platforms;
    cl::Platform::get(&platforms);

    if(platforms.size() == 0){
        std::cout<<"No OpenCL platforms were found"<<std::endl;
    }

    int platformID = -1;

    for(unsigned int i = 0; i < platforms.size(); i++) {
        try {
            VECTOR_CLASS<cl::Device> devices;
            platforms[i].getDevices(type, &devices);
            platformID = i;
            break;
        } catch(std::exception e) {
            std::cout<<"Error ao ler plataforma: "<<std::endl;
            continue;
        }
    }


    if(platformID == -1){
        std::cout<<"No compatible OpenCL platform found"<<std::endl;
    }

    cl::Platform platform = platforms[platformID];
    std::cout << "Using platform vendor: " << platform.getInfo<CL_PLATFORM_VENDOR>() << std::endl;


    // Use the preferred platform and create a context
    cl_context_properties cps[] = {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)(platform)(),
        0
    };

    try {
        context = cl::Context(type, cps);
    } catch(std::exception e) {
        std::cout<<"Failed to create an OpenCL context!"<<std::endl;
    }

    std::string filename = arq;
    std::ifstream sourceFile(filename.c_str());
    if(sourceFile.fail())
        std::cout<<"Failed to open OpenCL source file"<<std::endl;

    std::string sourceCode(
                std::istreambuf_iterator<char>(sourceFile),
                (std::istreambuf_iterator<char>()));
    cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1));

    // Make program of the source code in the context
    cl::Program program = cl::Program(context, source);

    VECTOR_CLASS<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
    std::string deviceInfo;
    cl_ulong memInfo;
    size_t tam;
    cl_uint clUnit;
    int indexDev = 0;
    int maxU = 0;
    for (int i = 0; i < devices.size(); ++i)
    {
        devices[i].getInfo((cl_device_info) CL_DEVICE_NAME, &deviceInfo);
        std::cout << "Device info: " << deviceInfo << std::endl;
        devices[i].getInfo((cl_device_info) CL_DEVICE_VERSION, &deviceInfo);
        std::cout << "Versão CL: " << deviceInfo << std::endl;
        devices[i].getInfo((cl_device_info) CL_DRIVER_VERSION, &deviceInfo);
        std::cout << "Versão Driver: " << deviceInfo << std::endl;
        devices[i].getInfo((cl_device_info) CL_DEVICE_GLOBAL_MEM_SIZE, &memInfo);
        std::cout << "Memoria Global: " << memInfo << std::endl;
        devices[i].getInfo((cl_device_info) CL_DEVICE_LOCAL_MEM_SIZE, &memInfo);
        std::cout << "Memoria Local: " << memInfo << std::endl;
        devices[i].getInfo((cl_device_info) CL_DEVICE_LOCAL_MEM_SIZE, &tam);
        std::cout << "Max tamanho Work-group: " << tam << std::endl;
        devices[i].getInfo((cl_device_info) CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &clUnit);
        std::cout << "Max dimensao: " << clUnit << std::endl;
        devices[i].getInfo((cl_device_info) CL_DEVICE_MAX_COMPUTE_UNITS, &clUnit);
        std::cout << "Unidades CL: " << clUnit << std::endl;
        std::cout << "*********************************" << std::endl;

        if((int)clUnit>maxU){
            indexDev = i;
            maxU = (int)clUnit;
        }
    }

    // Build program for these specific devices
    cl_int error = program.build(devices, buildOptions.c_str());
    if(error != 0) {
        std::cout << "Build log:" << std::endl << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl;
    }

    std::cout << "Index Dispositino selecionado: " << indexDev << std::endl;
    queue = cl::CommandQueue(context, devices[indexDev]);

    int posi = 0;
    int posf = 0;
    for(int i = 0; i < n; i++){

        posf = nomeRot.find(",",posi);
        std::string nomeRoti;
        if(posf != -1){
            nomeRoti = nomeRot.substr(posi,posf-posi);
        }else{
            nomeRoti = nomeRot.substr(posi);
        }
        std::cout<<"Nome rotina["<<i<<"]: "<<nomeRoti.data()<<std::endl;
        rotina.push_back(cl::Kernel(program, nomeRoti.data()));
        posi = posf + 1;
    }
}