/*virtual*/ void HdSt_Osd3Subdivision::RefineGPU(HdBufferArrayRangeSharedPtr const &range, TfToken const &name) { #if HDST_ENABLE_GPU_SUBDIVISION if (!TF_VERIFY(_vertexStencils)) return; // filling coarse vertices has been done at resource registry. // vertex buffer wrapper for OpenSubdiv API HdSt_OsdRefineComputationGPU::VertexBuffer vertexBuffer( range->GetResource(name)); // vertex buffer is not interleaved, but aggregated. // we need an offset to locate the current range. int stride = vertexBuffer.GetNumElements(); int numCoarseVertices = _vertexStencils->GetNumControlVertices(); OpenSubdiv::Osd::BufferDescriptor srcDesc( /*offset=*/range->GetOffset() * stride, /*length=*/stride, /*stride=*/stride); OpenSubdiv::Osd::BufferDescriptor dstDesc( /*offset=*/(range->GetOffset() + numCoarseVertices) * stride, /*length=*/stride, /*stride=*/stride); // GPU evaluator can be static, as long as it's called sequentially. static OpenSubdiv::Osd::EvaluatorCacheT<HdSt_OsdGpuEvaluator> evaluatorCache; HdSt_OsdGpuEvaluator const *instance = OpenSubdiv::Osd::GetEvaluator<HdSt_OsdGpuEvaluator>( &evaluatorCache, srcDesc, dstDesc, (void*)NULL /*deviceContext*/); instance->EvalStencils(&vertexBuffer, srcDesc, &vertexBuffer, dstDesc, _GetGpuStencilTable()); #else TF_CODING_ERROR("No GPU kernel available.\n"); #endif }
void Hd_SmoothNormalsComputationGPU::Execute( HdBufferArrayRangeSharedPtr const &range) { HD_TRACE_FUNCTION(); HF_MALLOC_TAG_FUNCTION(); if (!glDispatchCompute) return; TF_VERIFY(_adjacency); HdBufferArrayRangeSharedPtr const &adjacencyRange = _adjacency->GetAdjacencyRange(); TF_VERIFY(adjacencyRange); // select shader by datatype TfToken shaderToken; if (_srcDataType == GL_FLOAT) { if (_dstDataType == GL_FLOAT) { shaderToken = HdGLSLProgramTokens->smoothNormalsFloatToFloat; } else if (_dstDataType == GL_DOUBLE) { shaderToken = HdGLSLProgramTokens->smoothNormalsFloatToDouble; } else if (_dstDataType == GL_INT_2_10_10_10_REV) { shaderToken = HdGLSLProgramTokens->smoothNormalsFloatToPacked; } } else if (_srcDataType == GL_DOUBLE) { if (_dstDataType == GL_FLOAT) { shaderToken = HdGLSLProgramTokens->smoothNormalsDoubleToFloat; } else if (_dstDataType == GL_DOUBLE) { shaderToken = HdGLSLProgramTokens->smoothNormalsDoubleToDouble; } else if (_dstDataType == GL_INT_2_10_10_10_REV) { shaderToken = HdGLSLProgramTokens->smoothNormalsDoubleToPacked; } } if (!TF_VERIFY(!shaderToken.IsEmpty())) return; HdGLSLProgramSharedPtr computeProgram = HdGLSLProgram::GetComputeProgram(shaderToken); if (!computeProgram) return; GLuint program = computeProgram->GetProgram().GetId(); // buffer resources for GPU computation HdBufferResourceSharedPtr points = range->GetResource(_srcName); HdBufferResourceSharedPtr normals = range->GetResource(_dstName); HdBufferResourceSharedPtr adjacency = adjacencyRange->GetResource(); // prepare uniform buffer for GPU computation struct Uniform { int vertexOffset; int adjacencyOffset; int pointsOffset; int pointsStride; int normalsOffset; int normalsStride; } uniform; // coherent vertex offset in aggregated buffer array uniform.vertexOffset = range->GetOffset(); // adjacency offset/stride in aggregated adjacency table uniform.adjacencyOffset = adjacencyRange->GetOffset(); // interleaved offset/stride to points // note: this code (and the glsl smooth normal compute shader) assumes // components in interleaved vertex array are always same data type. // i.e. it can't handle an interleaved array which interleaves // float/double, float/int etc. uniform.pointsOffset = points->GetOffset() / points->GetComponentSize(); uniform.pointsStride = points->GetStride() / points->GetComponentSize(); // interleaved offset/stride to normals uniform.normalsOffset = normals->GetOffset() / normals->GetComponentSize(); uniform.normalsStride = normals->GetStride() / normals->GetComponentSize(); // The number of points is based off the size of the output, // However, the number of points in the adjacency table // is computed based off the largest vertex indexed from // to topology (aka topology->ComputeNumPoints). // // Therefore, we need to clamp the number of points // to the number of entries in the adjancency table. int numDestPoints = range->GetNumElements(); int numSrcPoints = _adjacency->GetNumPoints(); int numPoints = std::min(numSrcPoints, numDestPoints); // transfer uniform buffer GLuint ubo = computeProgram->GetGlobalUniformBuffer().GetId(); HdRenderContextCaps const &caps = HdRenderContextCaps::GetInstance(); // XXX: workaround for 319.xx driver bug of glNamedBufferDataEXT on UBO // XXX: move this workaround to renderContextCaps if (false && caps.directStateAccessEnabled) { glNamedBufferDataEXT(ubo, sizeof(uniform), &uniform, GL_STATIC_DRAW); } else { glBindBuffer(GL_UNIFORM_BUFFER, ubo); glBufferData(GL_UNIFORM_BUFFER, sizeof(uniform), &uniform, GL_STATIC_DRAW); glBindBuffer(GL_UNIFORM_BUFFER, 0); } glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, points->GetId()); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, normals->GetId()); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, adjacency->GetId()); // dispatch compute kernel glUseProgram(program); glDispatchCompute(numPoints, 1, 1); glUseProgram(0); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glBindBufferBase(GL_UNIFORM_BUFFER, 0, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); }
void Hd_SmoothNormalsComputationGPU::Execute( HdBufferArrayRangeSharedPtr const &range) { HD_TRACE_FUNCTION(); HD_MALLOC_TAG_FUNCTION(); if (not glDispatchCompute) return; // XXX: workaround until the shading stuff is implemeted. // The drawing program is owned and set by testHdBasicDrawing now, // so it has to be restored if it's changed in hd. GLint restoreProgram = 0; glGetIntegerv(GL_CURRENT_PROGRAM, &restoreProgram); int numPoints = range->GetNumElements(); TF_VERIFY(_adjacency); HdBufferArrayRangeSharedPtr const &adjacencyRange = _adjacency->GetAdjacencyRange(); TF_VERIFY(adjacencyRange); // select shader by datatype TfToken shaderToken = (_dstDataType == GL_FLOAT ? HdGLSLProgramTokens->smoothNormalsFloat : HdGLSLProgramTokens->smoothNormalsDouble); HdGLSLProgramSharedPtr computeProgram = HdGLSLProgram::GetComputeProgram(shaderToken); if (not computeProgram) return; GLuint program = computeProgram->GetProgram().GetId(); // buffer resources for GPU computation HdBufferResourceSharedPtr points = range->GetResource(_srcName); HdBufferResourceSharedPtr normals = range->GetResource(_dstName); HdBufferResourceSharedPtr adjacency = adjacencyRange->GetResource(); // prepare uniform buffer for GPU computation struct Uniform { int vertexOffset; int adjacencyStride; int adjacencyOffset; int padding; int pointsOffset; int pointsStride; int normalsOffset; int normalsStride; } uniform; // coherent vertex offset in aggregated buffer array uniform.vertexOffset = range->GetOffset(); // adjacency offset/stride in aggregated adjacency table uniform.adjacencyStride = _adjacency->GetStride(); uniform.adjacencyOffset = adjacencyRange->GetOffset(); uniform.padding = 0; // interleaved offset/stride to points // note: this code (and the glsl smooth normal compute shader) assumes // components in interleaved vertex array are always same data type. // i.e. it can't handle an interleaved array which interleaves // float/double, float/int etc. uniform.pointsOffset = points->GetOffset() / points->GetComponentSize(); uniform.pointsStride = points->GetStride() / points->GetComponentSize(); // interleaved offset/stride to normals uniform.normalsOffset = normals->GetOffset() / points->GetComponentSize(); uniform.normalsStride = normals->GetStride() / points->GetComponentSize(); // transfer uniform buffer GLuint ubo = computeProgram->GetGlobalUniformBuffer().GetId(); HdRenderContextCaps const &caps = HdRenderContextCaps::GetInstance(); // XXX: workaround for 319.xx driver bug of glNamedBufferDataEXT on UBO // XXX: move this workaround to renderContextCaps if (false and caps.directStateAccessEnabled) { glNamedBufferDataEXT(ubo, sizeof(uniform), &uniform, GL_STATIC_DRAW); } else { glBindBuffer(GL_UNIFORM_BUFFER, ubo); glBufferData(GL_UNIFORM_BUFFER, sizeof(uniform), &uniform, GL_STATIC_DRAW); glBindBuffer(GL_UNIFORM_BUFFER, 0); } glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, points->GetId()); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, normals->GetId()); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, adjacency->GetId()); // dispatch compute kernel glUseProgram(program); glDispatchCompute(numPoints, 1, 1); glUseProgram(0); glBindBufferBase(GL_UNIFORM_BUFFER, 0, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); // XXX: workaround until shading stuff implemeted. glUseProgram(restoreProgram); }