void Processor::Impl::getGpuLut3D(float* lut3d, const GpuShaderDesc & shaderDesc) const { if(!lut3d) return; AutoMutex lock(m_resultsCacheMutex); if(m_lastShaderDesc != shaderDesc.getCacheID()) { m_lastShaderDesc = shaderDesc.getCacheID(); m_shader = ""; m_shaderCacheID = ""; m_lut3D.clear(); m_lut3DCacheID = ""; } int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen(); int lut3DNumPixels = lut3DEdgeLen*lut3DEdgeLen*lut3DEdgeLen; // Can we write the entire shader using only shader text? // If so, the lut3D is not needed so clear it. // This is preferable to identity, as it lets people notice if // it's accidentally being used. if(m_gpuOpsCpuLatticeProcess.empty()) { memset(lut3d, 0, sizeof(float) * 3 * lut3DNumPixels); return; } if(m_lut3D.empty()) { // Allocate 3dlut image, RGBA m_lut3D.resize(lut3DNumPixels*4); GenerateIdentityLut3D(&m_lut3D[0], lut3DEdgeLen, 4); // Apply the lattice ops to it for(int i=0; i<(int)m_gpuOpsCpuLatticeProcess.size(); ++i) { m_gpuOpsCpuLatticeProcess[i]->apply(&m_lut3D[0], lut3DNumPixels); } // Convert the RGBA image to an RGB image, in place. // Of course, this only works because we're doing it from left to right // so old pixels are read before they're written over // TODO: is this bad for memory access patterns? // see if this is faster with a 2nd temp float array for(int i=1; i<lut3DNumPixels; ++i) // skip the 1st pixel, it's ok. { m_lut3D[3*i+0] = m_lut3D[4*i+0]; m_lut3D[3*i+1] = m_lut3D[4*i+1]; m_lut3D[3*i+2] = m_lut3D[4*i+2]; } } // Copy to the destination memcpy(lut3d, &m_lut3D[0], sizeof(float) * 3 * lut3DNumPixels); }
void ExponentOp::writeGpuShader(std::ostream & shader, const std::string & pixelName, const GpuShaderDesc & shaderDesc) const { GpuLanguage lang = shaderDesc.getLanguage(); float zerovec[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; shader << pixelName << " = pow("; shader << "max(" << pixelName << ", " << GpuTextHalf4(zerovec, lang) << ")"; shader << ", " << GpuTextHalf4(m_exp4, lang) << ");\n"; }
void Processor::Impl::calcGpuShaderText(std::ostream & shader, const GpuShaderDesc & shaderDesc) const { std::string pixelName = "out_pixel"; std::string lut3dName = "lut3d"; WriteShaderHeader(shader, pixelName, shaderDesc); for(unsigned int i=0; i<m_gpuOpsHwPreProcess.size(); ++i) { m_gpuOpsHwPreProcess[i]->writeGpuShader(shader, pixelName, shaderDesc); } if(!m_gpuOpsCpuLatticeProcess.empty()) { // Sample the 3D LUT. int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen(); shader << pixelName << ".rgb = "; Write_sampleLut3D_rgb(shader, pixelName, lut3dName, lut3DEdgeLen, shaderDesc.getLanguage()); } #ifdef __APPLE__ else { // Force a no-op sampling of the 3d lut on OSX to work around a segfault. int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen(); shader << "// OSX segfault work-around: Force a no-op sampling of the 3d lut.\n"; Write_sampleLut3D_rgb(shader, pixelName, lut3dName, lut3DEdgeLen, shaderDesc.getLanguage()); } #endif // __APPLE__ for(unsigned int i=0; i<m_gpuOpsHwPostProcess.size(); ++i) { m_gpuOpsHwPostProcess[i]->writeGpuShader(shader, pixelName, shaderDesc); } WriteShaderFooter(shader, pixelName, shaderDesc); }