void OpenCLBVHKernel::FreeBuffers() { delete kernel; kernel = NULL; OpenCLDeviceDescription *deviceDesc = device->GetDeviceDesc(); deviceDesc->FreeMemory(vertsBuff->getInfo<CL_MEM_SIZE>()); delete vertsBuff; vertsBuff = NULL; deviceDesc->FreeMemory(trisBuff->getInfo<CL_MEM_SIZE>()); delete trisBuff; trisBuff = NULL; deviceDesc->FreeMemory(bvhBuff->getInfo<CL_MEM_SIZE>()); delete bvhBuff; bvhBuff = NULL; }
OpenCLKernel *BVHAccel::NewOpenCLKernel(OpenCLIntersectionDevice *dev, unsigned int stackSize, bool disableImageStorage) const { OpenCLBVHKernel *kernel = new OpenCLBVHKernel(dev); const Context *deviceContext = dev->GetContext(); cl::Context &oclContext = dev->GetOpenCLContext(); const std::string &deviceName(dev->GetName()); OpenCLDeviceDescription *deviceDesc = dev->GetDeviceDesc(); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Vertices buffer size: " << (sizeof(Point) * preprocessedMesh->GetTotalVertexCount() / 1024) << "Kbytes"); cl::Buffer *vertsBuff = new cl::Buffer(oclContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(Point) * preprocessedMesh->GetTotalVertexCount(), preprocessedMesh->GetVertices()); deviceDesc->AllocMemory(vertsBuff->getInfo<CL_MEM_SIZE>()); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Triangle indices buffer size: " << (sizeof(Triangle) * preprocessedMesh->GetTotalTriangleCount() / 1024) << "Kbytes"); cl::Buffer *trisBuff = new cl::Buffer(oclContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(Triangle) * preprocessedMesh->GetTotalTriangleCount(), preprocessedMesh->GetTriangles()); deviceDesc->AllocMemory(trisBuff->getInfo<CL_MEM_SIZE>()); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH buffer size: " << (sizeof(BVHAccelArrayNode) * nNodes / 1024) << "Kbytes"); cl::Buffer *bvhBuff = new cl::Buffer(oclContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(BVHAccelArrayNode) * nNodes, (void*)bvhTree); deviceDesc->AllocMemory(bvhBuff->getInfo<CL_MEM_SIZE>()); kernel->SetBuffers(vertsBuff, preprocessedMesh->GetTotalTriangleCount(), trisBuff, nNodes, bvhBuff); return kernel; }
OCLRenderEngine::OCLRenderEngine(RenderConfig *rcfg, Film *flm, boost::mutex *flmMutex, bool fatal) : RenderEngine(rcfg, flm, flmMutex) { #if !defined(LUXRAYS_DISABLE_OPENCL) const Properties &cfg = renderConfig->cfg; const bool useCPUs = (cfg.GetInt("opencl.cpu.use", 1) != 0); const bool useGPUs = (cfg.GetInt("opencl.gpu.use", 1) != 0); const u_int forceGPUWorkSize = cfg.GetInt("opencl.gpu.workgroup.size", 64); const u_int forceCPUWorkSize = cfg.GetInt("opencl.cpu.workgroup.size", 1); const string oclDeviceConfig = cfg.GetString("opencl.devices.select", ""); // Start OpenCL devices std::vector<DeviceDescription *> descs = ctx->GetAvailableDeviceDescriptions(); DeviceDescription::Filter(DEVICE_TYPE_OPENCL_ALL, descs); // Device info bool haveSelectionString = (oclDeviceConfig.length() > 0); if (haveSelectionString && (oclDeviceConfig.length() != descs.size())) { stringstream ss; ss << "OpenCL device selection string has the wrong length, must be " << descs.size() << " instead of " << oclDeviceConfig.length(); throw runtime_error(ss.str().c_str()); } for (size_t i = 0; i < descs.size(); ++i) { OpenCLDeviceDescription *desc = static_cast<OpenCLDeviceDescription *>(descs[i]); if (haveSelectionString) { if (oclDeviceConfig.at(i) == '1') { if (desc->GetType() == DEVICE_TYPE_OPENCL_GPU) desc->SetForceWorkGroupSize(forceGPUWorkSize); else if (desc->GetType() == DEVICE_TYPE_OPENCL_CPU) desc->SetForceWorkGroupSize(forceCPUWorkSize); selectedDeviceDescs.push_back(desc); } } else { if ((useCPUs && desc->GetType() == DEVICE_TYPE_OPENCL_CPU) || (useGPUs && desc->GetType() == DEVICE_TYPE_OPENCL_GPU)) { if (desc->GetType() == DEVICE_TYPE_OPENCL_GPU) desc->SetForceWorkGroupSize(forceGPUWorkSize); else if (desc->GetType() == DEVICE_TYPE_OPENCL_CPU) desc->SetForceWorkGroupSize(forceCPUWorkSize); selectedDeviceDescs.push_back(descs[i]); } } } #endif if (fatal && selectedDeviceDescs.size() == 0) throw runtime_error("No OpenCL device selected or available"); }
void RenderingConfig::SetUpOpenCLDevices(const bool useCPUs, const bool useGPUs, const unsigned int forceGPUWorkSize, const unsigned int oclDeviceThreads, const string &oclDeviceConfig) { std::vector<DeviceDescription *> descs = ctx->GetAvailableDeviceDescriptions(); DeviceDescription::Filter(DEVICE_TYPE_OPENCL, descs); // Device info bool haveSelectionString = (oclDeviceConfig.length() > 0); if (haveSelectionString && (oclDeviceConfig.length() != descs.size())) { stringstream ss; ss << "OpenCL device selection string has the wrong length, must be " << descs.size() << " instead of " << oclDeviceConfig.length(); throw runtime_error(ss.str().c_str()); } std::vector<DeviceDescription *> selectedDescs; #if !defined(LUXRAYS_DISABLE_OPENCL) for (size_t i = 0; i < descs.size(); ++i) { OpenCLDeviceDescription *desc = (OpenCLDeviceDescription *)descs[i]; if (haveSelectionString) { if (oclDeviceConfig.at(i) == '1') { if (desc->GetOpenCLType() == OCL_DEVICE_TYPE_GPU) desc->SetForceWorkGroupSize(forceGPUWorkSize); selectedDescs.push_back(desc); } } else { if ((useCPUs && desc->GetOpenCLType() == OCL_DEVICE_TYPE_CPU) || (useGPUs && desc->GetOpenCLType() == OCL_DEVICE_TYPE_GPU)) { if (desc->GetOpenCLType() == OCL_DEVICE_TYPE_GPU) desc->SetForceWorkGroupSize(forceGPUWorkSize); selectedDescs.push_back(descs[i]); } } } #endif if (selectedDescs.size() == 0) cerr << "No OpenCL device selected" << endl; else { #if !defined(LUXRAYS_DISABLE_OPENCL) if (cfg.GetInt("opencl.latency.mode", 1) && (cfg.GetInt("renderengine.type", 0) == 3) && (cfg.GetInt("pathgpu.openglinterop.enable", 0) != 0)) { // Ask for OpenGL interoperability on the first device ((OpenCLDeviceDescription *)selectedDescs[0])->EnableOGLInterop(); } #endif // Allocate devices const size_t gpuRenderThreadCount = (oclDeviceThreads < 1) ? (2 * selectedDescs.size()) : oclDeviceThreads; if ((gpuRenderThreadCount == 1) && (selectedDescs.size() == 1)) { // Optimize the special case of one render thread and one GPU intersectionGPUDevices = ctx->AddIntersectionDevices(selectedDescs); } else if ((gpuRenderThreadCount > 1) && (selectedDescs.size() == 1)) { // Optimize the special case of many render thread and one GPU intersectionGPUDevices = ctx->AddVirtualM2OIntersectionDevices(gpuRenderThreadCount, selectedDescs); } else { // Create and start the virtual devices (only if there is more than one GPUs) intersectionGPUDevices = ctx->AddVirtualM2MIntersectionDevices(gpuRenderThreadCount, selectedDescs); } cerr << "OpenCL Devices used: "; for (size_t i = 0; i < intersectionGPUDevices.size(); ++i) cerr << "[" << intersectionGPUDevices[i]->GetName() << "]"; cerr << endl; } }