void
OsdCudaKernelDispatcher::ApplyLoopVertexVerticesKernelA(FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
    OsdCudaComputeVertexA(_deviceVertices, _deviceVaryings,
                          _numVertexElements-3, _numVaryingElements,
                          (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
                          (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
                          offset, start, end, pass);
}
void
OsdCudaComputeController::ApplyLoopVertexVerticesKernelA2(
    FarKernelBatch const &batch, void * clientdata) const {

    OsdCudaComputeContext * context =
        static_cast<OsdCudaComputeContext*>(clientdata);
    assert(context);

    const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables<OsdVertex>::V_ITa);
    const OsdCudaTable * V_W = context->GetTable(FarSubdivisionTables<OsdVertex>::V_W);
    assert(V_ITa);
    assert(V_W);

    OsdCudaComputeVertexA(
        context->GetCurrentVertexBuffer(),
        context->GetCurrentVaryingBuffer(),
        context->GetVertexDescriptor().numVertexElements-3,
        context->GetVertexDescriptor().numVaryingElements,
        static_cast<int*>(V_ITa->GetCudaMemory()),
        static_cast<float*>(V_W->GetCudaMemory()),
        batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
void
OsdCudaComputeController::ApplyLoopVertexVerticesKernelA2(
    FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {

    assert(context);

    const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
    const OsdCudaTable * V_W = context->GetTable(FarSubdivisionTables::V_W);
    assert(V_ITa);
    assert(V_W);

    float *vertex = _currentBindState.GetOffsettedVertexBuffer();
    float *varying = _currentBindState.GetOffsettedVaryingBuffer();

    OsdCudaComputeVertexA(
        vertex, varying,
        _currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
        _currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
        static_cast<int*>(V_ITa->GetCudaMemory()),
        static_cast<float*>(V_W->GetCudaMemory()),
        batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
void
OsdCudaKernelDispatcher::ApplyLoopVertexVerticesKernelA(
    FarMesh<OsdVertex> * mesh, int offset, bool pass, int level,
    int start, int end, void * clientdata) const {

    OsdCudaComputeContext * context =
        static_cast<OsdCudaComputeContext*>(clientdata);
    assert(context);

    const OsdCudaTable * V_ITa = context->GetTable(Table::V_ITa);
    const OsdCudaTable * V_W = context->GetTable(Table::V_W);
    assert(V_ITa);
    assert(V_W);

    OsdCudaComputeVertexA(
        context->GetCurrentVertexBuffer(),
        context->GetCurrentVaryingBuffer(),
        context->GetCurrentVertexNumElements()-3,
        context->GetCurrentVaryingNumElements(),
        static_cast<int*>(V_ITa->GetCudaMemory()) + V_ITa->GetMarker(level-1),
        static_cast<float*>(V_W->GetCudaMemory()) + V_W->GetMarker(level-1),
        offset, start, end, pass);
}