Exemplo n.º 1
0
/*virtual*/
void
HdSt_Osd3Subdivision::RefineGPU(HdBufferArrayRangeSharedPtr const &range,
                              TfToken const &name)
{
#if HDST_ENABLE_GPU_SUBDIVISION
    if (!TF_VERIFY(_vertexStencils)) return;

    // filling coarse vertices has been done at resource registry.

    // vertex buffer wrapper for OpenSubdiv API
    HdSt_OsdRefineComputationGPU::VertexBuffer vertexBuffer(
        range->GetResource(name));

    // vertex buffer is not interleaved, but aggregated.
    // we need an offset to locate the current range.
    int stride = vertexBuffer.GetNumElements();
    int numCoarseVertices = _vertexStencils->GetNumControlVertices();

    OpenSubdiv::Osd::BufferDescriptor srcDesc(
        /*offset=*/range->GetOffset() * stride,
        /*length=*/stride,
        /*stride=*/stride);
    OpenSubdiv::Osd::BufferDescriptor dstDesc(
        /*offset=*/(range->GetOffset() + numCoarseVertices) * stride,
        /*length=*/stride,
        /*stride=*/stride);

    // GPU evaluator can be static, as long as it's called sequentially.
    static OpenSubdiv::Osd::EvaluatorCacheT<HdSt_OsdGpuEvaluator> evaluatorCache;

    HdSt_OsdGpuEvaluator const *instance =
        OpenSubdiv::Osd::GetEvaluator<HdSt_OsdGpuEvaluator>(
            &evaluatorCache, srcDesc, dstDesc, (void*)NULL /*deviceContext*/);

    instance->EvalStencils(&vertexBuffer, srcDesc,
                           &vertexBuffer, dstDesc,
                           _GetGpuStencilTable());
#else
    TF_CODING_ERROR("No GPU kernel available.\n");
#endif
}
Exemplo n.º 2
0
bool
HdBufferArray::TryAssignRange(HdBufferArrayRangeSharedPtr &range)
{
    // Garbage collection should make sure range list is
    // contiguous, so we only ever need to insert at end.
    size_t allocIdx = _rangeCount++;

    if (allocIdx >= _maxNumRanges) {
        // Make sure out range count remains clamp at _maxNumRanges.
        // It's ok if multiple threads race to set this to the same value
        // (other than the cache line bouncing)
        _rangeCount.store(_maxNumRanges);

        return false;
    }

     size_t newSize = allocIdx + 1;

    // As we might grow the array (which would result in a copy)
    // we need to lock around the whole insert into _rangeList.
    //
    // An optomisation could be to change into a read/write lock.
    {
        std::lock_guard<std::mutex> lock(_rangeListLock);

        if (newSize > _rangeList.size()) {
            _rangeList.resize(newSize);
        }
        _rangeList[allocIdx] = range;
    }

    range->SetBufferArray(this);
    
    _needsReallocation = true;  // Multiple threads may try to set this to true at once, which is ok.

    return true;
}
Exemplo n.º 3
0
void
Hd_SmoothNormalsComputationGPU::Execute(
    HdBufferArrayRangeSharedPtr const &range)
{
    HD_TRACE_FUNCTION();
    HF_MALLOC_TAG_FUNCTION();

    if (!glDispatchCompute)
        return;


    TF_VERIFY(_adjacency);
    HdBufferArrayRangeSharedPtr const &adjacencyRange = _adjacency->GetAdjacencyRange();
    TF_VERIFY(adjacencyRange);

    // select shader by datatype
    TfToken shaderToken;
    if (_srcDataType == GL_FLOAT) {
        if (_dstDataType == GL_FLOAT) {
            shaderToken = HdGLSLProgramTokens->smoothNormalsFloatToFloat;
        } else if (_dstDataType == GL_DOUBLE) {
            shaderToken = HdGLSLProgramTokens->smoothNormalsFloatToDouble;
        } else if (_dstDataType == GL_INT_2_10_10_10_REV) {
            shaderToken = HdGLSLProgramTokens->smoothNormalsFloatToPacked;
        }
    } else if (_srcDataType == GL_DOUBLE) {
        if (_dstDataType == GL_FLOAT) {
            shaderToken = HdGLSLProgramTokens->smoothNormalsDoubleToFloat;
        } else if (_dstDataType == GL_DOUBLE) {
            shaderToken = HdGLSLProgramTokens->smoothNormalsDoubleToDouble;
        } else if (_dstDataType == GL_INT_2_10_10_10_REV) {
            shaderToken = HdGLSLProgramTokens->smoothNormalsDoubleToPacked;
        }
    }
    if (!TF_VERIFY(!shaderToken.IsEmpty())) return;

    HdGLSLProgramSharedPtr computeProgram
        = HdGLSLProgram::GetComputeProgram(shaderToken);
    if (!computeProgram) return;

    GLuint program = computeProgram->GetProgram().GetId();

    // buffer resources for GPU computation
    HdBufferResourceSharedPtr points = range->GetResource(_srcName);
    HdBufferResourceSharedPtr normals = range->GetResource(_dstName);
    HdBufferResourceSharedPtr adjacency = adjacencyRange->GetResource();

    // prepare uniform buffer for GPU computation
    struct Uniform {
        int vertexOffset;
        int adjacencyOffset;
        int pointsOffset;
        int pointsStride;
        int normalsOffset;
        int normalsStride;
    } uniform;

    // coherent vertex offset in aggregated buffer array
    uniform.vertexOffset = range->GetOffset();
    // adjacency offset/stride in aggregated adjacency table
    uniform.adjacencyOffset = adjacencyRange->GetOffset();
    // interleaved offset/stride to points
    // note: this code (and the glsl smooth normal compute shader) assumes
    // components in interleaved vertex array are always same data type.
    // i.e. it can't handle an interleaved array which interleaves
    // float/double, float/int etc.
    uniform.pointsOffset = points->GetOffset() / points->GetComponentSize();
    uniform.pointsStride = points->GetStride() / points->GetComponentSize();
    // interleaved offset/stride to normals
    uniform.normalsOffset = normals->GetOffset() / normals->GetComponentSize();
    uniform.normalsStride = normals->GetStride() / normals->GetComponentSize();

    // The number of points is based off the size of the output,
    // However, the number of points in the adjacency table
    // is computed based off the largest vertex indexed from
    // to topology (aka topology->ComputeNumPoints).
    //
    // Therefore, we need to clamp the number of points
    // to the number of entries in the adjancency table.
    int numDestPoints = range->GetNumElements();
    int numSrcPoints = _adjacency->GetNumPoints();

    int numPoints = std::min(numSrcPoints, numDestPoints);

    // transfer uniform buffer
    GLuint ubo = computeProgram->GetGlobalUniformBuffer().GetId();
    HdRenderContextCaps const &caps = HdRenderContextCaps::GetInstance();
    // XXX: workaround for 319.xx driver bug of glNamedBufferDataEXT on UBO
    // XXX: move this workaround to renderContextCaps
    if (false && caps.directStateAccessEnabled) {
        glNamedBufferDataEXT(ubo, sizeof(uniform), &uniform, GL_STATIC_DRAW);
    } else {
        glBindBuffer(GL_UNIFORM_BUFFER, ubo);
        glBufferData(GL_UNIFORM_BUFFER, sizeof(uniform), &uniform, GL_STATIC_DRAW);
        glBindBuffer(GL_UNIFORM_BUFFER, 0);
    }

    glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, points->GetId());
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, normals->GetId());
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, adjacency->GetId());

    // dispatch compute kernel
    glUseProgram(program);

    glDispatchCompute(numPoints, 1, 1);

    glUseProgram(0);
    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

    glBindBufferBase(GL_UNIFORM_BUFFER, 0, 0);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
}
Exemplo n.º 4
0
void
Hd_SmoothNormalsComputationGPU::Execute(
    HdBufferArrayRangeSharedPtr const &range)
{
    HD_TRACE_FUNCTION();
    HD_MALLOC_TAG_FUNCTION();

    if (not glDispatchCompute)
        return;

    // XXX: workaround until the shading stuff is implemeted.
    // The drawing program is owned and set by testHdBasicDrawing now,
    // so it has to be restored if it's changed in hd.
    GLint restoreProgram = 0;
    glGetIntegerv(GL_CURRENT_PROGRAM, &restoreProgram);

    int numPoints = range->GetNumElements();

    TF_VERIFY(_adjacency);
    HdBufferArrayRangeSharedPtr const &adjacencyRange = _adjacency->GetAdjacencyRange();
    TF_VERIFY(adjacencyRange);

    // select shader by datatype
    TfToken shaderToken = (_dstDataType == GL_FLOAT ?
                           HdGLSLProgramTokens->smoothNormalsFloat :
                           HdGLSLProgramTokens->smoothNormalsDouble);

    HdGLSLProgramSharedPtr computeProgram = HdGLSLProgram::GetComputeProgram(shaderToken);
    if (not computeProgram) return;

    GLuint program = computeProgram->GetProgram().GetId();

    // buffer resources for GPU computation
    HdBufferResourceSharedPtr points = range->GetResource(_srcName);
    HdBufferResourceSharedPtr normals = range->GetResource(_dstName);
    HdBufferResourceSharedPtr adjacency = adjacencyRange->GetResource();

    // prepare uniform buffer for GPU computation
    struct Uniform {
        int vertexOffset;
        int adjacencyStride;
        int adjacencyOffset;
        int padding;
        int pointsOffset;
        int pointsStride;
        int normalsOffset;
        int normalsStride;
    } uniform;

    // coherent vertex offset in aggregated buffer array
    uniform.vertexOffset = range->GetOffset();
    // adjacency offset/stride in aggregated adjacency table
    uniform.adjacencyStride = _adjacency->GetStride();
    uniform.adjacencyOffset = adjacencyRange->GetOffset();
    uniform.padding = 0;
    // interleaved offset/stride to points
    // note: this code (and the glsl smooth normal compute shader) assumes
    // components in interleaved vertex array are always same data type.
    // i.e. it can't handle an interleaved array which interleaves
    // float/double, float/int etc.
    uniform.pointsOffset = points->GetOffset() / points->GetComponentSize();
    uniform.pointsStride = points->GetStride() / points->GetComponentSize();
    // interleaved offset/stride to normals
    uniform.normalsOffset = normals->GetOffset() / points->GetComponentSize();
    uniform.normalsStride = normals->GetStride() / points->GetComponentSize();

    // transfer uniform buffer
    GLuint ubo = computeProgram->GetGlobalUniformBuffer().GetId();
    HdRenderContextCaps const &caps = HdRenderContextCaps::GetInstance();
    // XXX: workaround for 319.xx driver bug of glNamedBufferDataEXT on UBO
    // XXX: move this workaround to renderContextCaps
    if (false and caps.directStateAccessEnabled) {
        glNamedBufferDataEXT(ubo, sizeof(uniform), &uniform, GL_STATIC_DRAW);
    } else {
        glBindBuffer(GL_UNIFORM_BUFFER, ubo);
        glBufferData(GL_UNIFORM_BUFFER, sizeof(uniform), &uniform, GL_STATIC_DRAW);
        glBindBuffer(GL_UNIFORM_BUFFER, 0);
    }

    glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, points->GetId());
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, normals->GetId());
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, adjacency->GetId());

    // dispatch compute kernel
    glUseProgram(program);

    glDispatchCompute(numPoints, 1, 1);

    glUseProgram(0);

    glBindBufferBase(GL_UNIFORM_BUFFER, 0, 0);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);

    // XXX: workaround until shading stuff implemeted.
    glUseProgram(restoreProgram);
}