//--------------------------------------------------------------------------------------
// Encode the source texture to BC7 and store the result in a buffer
// The source texture can only have 1 sub resource, i.e. it must be a signle 2D texture which has only 1 mip level
// The job of breaking down texture arrays, or texture with multiple mip levels is taken care of in the base class
//--------------------------------------------------------------------------------------
HRESULT CGPUBC7Encoder::GPU_Encode( ID3D11Device* pDevice, ID3D11DeviceContext* pContext,
                                    ID3D11Texture2D* pSrcTexture, 
                                    DXGI_FORMAT dstFormat, ID3D11Buffer** ppDstTextureAsBufOut )
{
    ID3D11ShaderResourceView* pSRV = nullptr;
    ID3D11Buffer* pErrBestModeBuffer[2] = { nullptr, nullptr };
    ID3D11UnorderedAccessView* pUAV = nullptr;
    ID3D11UnorderedAccessView* pErrBestModeUAV[2] = { nullptr, nullptr };
    ID3D11ShaderResourceView* pErrBestModeSRV[2] = { nullptr, nullptr };
    ID3D11Buffer* pCBCS = nullptr;

    if ( !(dstFormat == DXGI_FORMAT_BC7_UNORM || dstFormat == DXGI_FORMAT_BC7_UNORM_SRGB) || 
         !ppDstTextureAsBufOut )
    {
        return E_INVALIDARG;
    }

    D3D11_TEXTURE2D_DESC texSrcDesc;
    pSrcTexture->GetDesc( &texSrcDesc );

    HRESULT hr = S_OK;

    // Create a SRV for input texture        
    {
        D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {};
        SRVDesc.Texture2D.MipLevels = texSrcDesc.MipLevels;
        SRVDesc.Texture2D.MostDetailedMip = 0;
        SRVDesc.Format = texSrcDesc.Format;
        SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
        V_GOTO( pDevice->CreateShaderResourceView( pSrcTexture, &SRVDesc, &pSRV ) );

#if defined(_DEBUG) || defined(PROFILE)
        if ( pSRV )
        {
            pSRV->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 SRV" ) - 1, "BC7 SRV" );
        }
#endif
    }

    // Create output buffer    
    D3D11_BUFFER_DESC sbOutDesc;
    {
        sbOutDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
        sbOutDesc.CPUAccessFlags = 0;
        sbOutDesc.Usage = D3D11_USAGE_DEFAULT;
        sbOutDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
        sbOutDesc.StructureByteStride = sizeof( BufferBC6HBC7 );
        sbOutDesc.ByteWidth = texSrcDesc.Height * texSrcDesc.Width * sizeof( BufferBC6HBC7 ) / BLOCK_SIZE;
        //+ texSrcDesc.Height * texSrcDesc.Width * sizeof( BufferBC7 ) * 5;//For dump
        V_GOTO( pDevice->CreateBuffer(&sbOutDesc, nullptr, ppDstTextureAsBufOut) );
        V_GOTO( pDevice->CreateBuffer(&sbOutDesc, nullptr, &pErrBestModeBuffer[0]) );
        V_GOTO( pDevice->CreateBuffer(&sbOutDesc, nullptr, &pErrBestModeBuffer[1]) );

        _Analysis_assume_( pErrBestModeBuffer[0] != 0 );

#if defined(_DEBUG) || defined(PROFILE)
        if ( *ppDstTextureAsBufOut )
        {
            (*ppDstTextureAsBufOut)->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 Dest" ) - 1, "BC7 Dest" );
        }
        if ( pErrBestModeBuffer[0] )
        {
            pErrBestModeBuffer[0]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest0" ) - 1, "BC7 ErrBest0" );
        }
        if ( pErrBestModeBuffer[1] )
        {
            pErrBestModeBuffer[1]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest1" ) - 1, "BC7 ErrBest1" );
        }
#endif
    }

    // Create UAV of the output texture    
    {
        D3D11_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {};
        UAVDesc.Buffer.FirstElement = 0;
        UAVDesc.Buffer.NumElements = sbOutDesc.ByteWidth / sbOutDesc.StructureByteStride;
        UAVDesc.Format = DXGI_FORMAT_UNKNOWN;
        UAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
#pragma warning (push)
#pragma warning (disable:6387)
        V_GOTO( pDevice->CreateUnorderedAccessView( *ppDstTextureAsBufOut, &UAVDesc, &pUAV ) );
        V_GOTO( pDevice->CreateUnorderedAccessView( pErrBestModeBuffer[0], &UAVDesc, &pErrBestModeUAV[0] ) );
        V_GOTO( pDevice->CreateUnorderedAccessView( pErrBestModeBuffer[1], &UAVDesc, &pErrBestModeUAV[1] ) );
#pragma warning (pop)

#if defined(_DEBUG) || defined(PROFILE)
        if ( pUAV )
        {
            pUAV->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 Dest UAV" ) - 1, "BC7 Dest UAV" );
        }
        if ( pErrBestModeUAV[0] )
        {
            pErrBestModeUAV[0]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest0 UAV" ) - 1, "BC7 ErrBest0 UAV" );
        }
        if ( pErrBestModeUAV[1] )
        {
            pErrBestModeUAV[1]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest1 UAV" ) - 1, "BC7 ErrBest1 UAV" );
        }
#endif
    }
    
    {
        D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {};
        SRVDesc.Buffer.FirstElement = 0;
        SRVDesc.Buffer.NumElements = texSrcDesc.Height * texSrcDesc.Width / BLOCK_SIZE;
        SRVDesc.Format = DXGI_FORMAT_UNKNOWN;
        SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
#pragma warning (push)
#pragma warning (disable:6387)
        V_GOTO( pDevice->CreateShaderResourceView( pErrBestModeBuffer[0], &SRVDesc, &pErrBestModeSRV[0]) );
        V_GOTO( pDevice->CreateShaderResourceView( pErrBestModeBuffer[1], &SRVDesc, &pErrBestModeSRV[1]) );
#pragma warning (pop)

#if defined(_DEBUG) || defined(PROFILE)
        if ( pErrBestModeSRV[0] )
        {
            pErrBestModeSRV[0]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest0 SRV" ) - 1, "BC7 ErrBest0 SRV" );
        }
        if ( pErrBestModeSRV[1] )
        {
            pErrBestModeSRV[1]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest1 SRV" ) - 1, "BC7 ErrBest1 SRV" );
        }
#endif
    }

    // Create constant buffer    
    {
        D3D11_BUFFER_DESC cbDesc;
        cbDesc.Usage = D3D11_USAGE_DYNAMIC;
        cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
        cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
        cbDesc.MiscFlags = 0;
        cbDesc.ByteWidth = sizeof( UINT ) * 8;
        V_GOTO( pDevice->CreateBuffer( &cbDesc, nullptr, &pCBCS ) );

#if defined(_DEBUG) || defined(PROFILE)
        if ( pCBCS )
        {
            pCBCS->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7Encode" ) - 1, "BC7Encode" );
        }
#endif
    }

    int const MAX_BLOCK_BATCH = 64;

    int num_total_blocks = texSrcDesc.Width / BLOCK_SIZE_X * texSrcDesc.Height / BLOCK_SIZE_Y;
    int num_blocks = num_total_blocks;
    int start_block_id = 0;
    while (num_blocks > 0)
    {
        int n = min(num_blocks, MAX_BLOCK_BATCH);
        UINT uThreadGroupCount = n;

        {
            D3D11_MAPPED_SUBRESOURCE cbMapped;
            pContext->Map( pCBCS, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbMapped );

            UINT param[8];
            param[0] = texSrcDesc.Width;
            param[1] = texSrcDesc.Width / BLOCK_SIZE_X;
            param[2] = dstFormat;
            param[3] = 0;
            param[4] = start_block_id;
            param[5] = num_total_blocks;
            *((float*)&param[6]) = m_fAlphaWeight;
            memcpy( cbMapped.pData, param, sizeof( param ) );
            pContext->Unmap( pCBCS, 0 );
        }

        ID3D11ShaderResourceView* pSRVs[] = { pSRV, nullptr };
        RunComputeShader( pContext, m_pTryMode456CS, pSRVs, 2, pCBCS, pErrBestModeUAV[0], __max(uThreadGroupCount / 4, 1), 1, 1 );        

        for (int i = 0; i < 3; ++ i)
        {
            int modes[] = { 1, 3, 7 };
            {
                D3D11_MAPPED_SUBRESOURCE cbMapped;
                pContext->Map( pCBCS, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbMapped );

                UINT param[8];
                param[0] = texSrcDesc.Width;
                param[1] = texSrcDesc.Width / BLOCK_SIZE_X;
                param[2] = dstFormat;
                param[3] = modes[i];
                param[4] = start_block_id;
                param[5] = num_total_blocks;
                *((float*)&param[6]) = m_fAlphaWeight;
                memcpy( cbMapped.pData, param, sizeof( param ) );
                pContext->Unmap( pCBCS, 0 );
            }

            pSRVs[1] = pErrBestModeSRV[i & 1];
            RunComputeShader( pContext, m_pTryMode137CS, pSRVs, 2, pCBCS,  pErrBestModeUAV[!(i & 1)], uThreadGroupCount, 1, 1 );
        }               

        for (int i = 0; i < 2; ++ i)
        {
            int modes[] = { 0, 2 };
            {
                D3D11_MAPPED_SUBRESOURCE cbMapped;
                pContext->Map( pCBCS, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbMapped );

                UINT param[8];
                param[0] = texSrcDesc.Width;
                param[1] = texSrcDesc.Width / BLOCK_SIZE_X;
                param[2] = dstFormat;
                param[3] = modes[i];
                param[4] = start_block_id;
                param[5] = num_total_blocks;
                *((float*)&param[6]) = m_fAlphaWeight;
                memcpy( cbMapped.pData, param, sizeof( param ) );
                pContext->Unmap( pCBCS, 0 );
            }

            pSRVs[1] = pErrBestModeSRV[!(i & 1)];
            RunComputeShader( pContext, m_pTryMode02CS, pSRVs, 2, pCBCS,  pErrBestModeUAV[i & 1], uThreadGroupCount, 1, 1 );
        }

        pSRVs[1] = pErrBestModeSRV[1];
        RunComputeShader( pContext, m_pEncodeBlockCS, pSRVs, 2, pCBCS,  pUAV, __max(uThreadGroupCount / 4, 1), 1, 1 );        

        start_block_id += n;
        num_blocks -= n;
    }

quit:
    SAFE_RELEASE(pSRV);
    SAFE_RELEASE(pUAV);
    SAFE_RELEASE(pErrBestModeSRV[0]);
    SAFE_RELEASE(pErrBestModeSRV[1]);
    SAFE_RELEASE(pErrBestModeUAV[0]);
    SAFE_RELEASE(pErrBestModeUAV[1]);
    SAFE_RELEASE(pErrBestModeBuffer[0]);
    SAFE_RELEASE(pErrBestModeBuffer[1]);
    SAFE_RELEASE(pCBCS);

    return hr;
}
예제 #2
0
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT GPUCompressBC::Compress( const Image& srcImage, const Image& destImage )
{
    if ( !srcImage.pixels || !destImage.pixels )
        return E_INVALIDARG;

    if ( srcImage.width != destImage.width
         || srcImage.height != destImage.height
         || srcImage.width != m_width
         || srcImage.height != m_height
         || srcImage.format != m_srcformat
         || destImage.format != m_bcformat )
    {
        return E_UNEXPECTED;
    }

    //--- Create input texture --------------------------------------------------------
    auto pDevice = m_device.Get();
    if ( !pDevice )
        return E_POINTER;

    // We need to avoid the hardware doing additional colorspace conversion
    DXGI_FORMAT inputFormat = ( m_srcformat == DXGI_FORMAT_R8G8B8A8_UNORM_SRGB ) ? DXGI_FORMAT_R8G8B8A8_UNORM : m_srcformat;

    ScopedObject<ID3D11Texture2D> sourceTex;
    {
        D3D11_TEXTURE2D_DESC desc;
        memset( &desc, 0, sizeof(desc) );
        desc.Width = static_cast<UINT>( srcImage.width );
        desc.Height = static_cast<UINT>( srcImage.height ); 
        desc.MipLevels = 1;
        desc.ArraySize = 1;
        desc.Format = inputFormat;
        desc.SampleDesc.Count = 1;
        desc.Usage = D3D11_USAGE_DEFAULT;
        desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;

        D3D11_SUBRESOURCE_DATA initData;
        initData.pSysMem = srcImage.pixels;
        initData.SysMemPitch = static_cast<DWORD>( srcImage.rowPitch );
        initData.SysMemSlicePitch = static_cast<DWORD>( srcImage.slicePitch );

        HRESULT hr = pDevice->CreateTexture2D( &desc, &initData, sourceTex.GetAddressOf() );
        if ( FAILED(hr) )
        {
            return hr;
        }
    }

    ScopedObject<ID3D11ShaderResourceView> sourceSRV;
    {
        D3D11_SHADER_RESOURCE_VIEW_DESC desc;
        memset( &desc, 0, sizeof(desc) );
        desc.Texture2D.MipLevels = 1;
        desc.Format = inputFormat;
        desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;

        HRESULT hr = pDevice->CreateShaderResourceView( sourceTex.Get(), &desc, sourceSRV.ReleaseAndGetAddressOf() );
        if ( FAILED(hr) )
        {
            return hr;
        }
    }

    //--- Compress using DirectCompute ------------------------------------------------
    bool isbc7 = false;
    switch( m_bcformat )
    {
    case DXGI_FORMAT_BC6H_TYPELESS:
    case DXGI_FORMAT_BC6H_UF16:
    case DXGI_FORMAT_BC6H_SF16:
        break;

    case DXGI_FORMAT_BC7_TYPELESS:
    case DXGI_FORMAT_BC7_UNORM:
    case DXGI_FORMAT_BC7_UNORM_SRGB:
        isbc7 = true;
        break;

    default:
        return E_UNEXPECTED;
    }

    const UINT MAX_BLOCK_BATCH = 64;

    auto pContext = m_context.Get();
    if ( !pContext )
        return E_UNEXPECTED;

    size_t xblocks = std::max<size_t>( 1, (m_width + 3) >> 2 );
    size_t yblocks = std::max<size_t>( 1, (m_height + 3) >> 2 );

    UINT num_total_blocks = static_cast<UINT>( xblocks * yblocks );
    UINT num_blocks = num_total_blocks;
    int start_block_id = 0;
    while (num_blocks > 0)
    {
        UINT n = std::min<UINT>( num_blocks, MAX_BLOCK_BATCH );
        UINT uThreadGroupCount = n;

        {
            D3D11_MAPPED_SUBRESOURCE mapped;
            HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
            if ( FAILED(hr) )
                return hr;

            ConstantsBC6HBC7 param;
            param.tex_width = static_cast<UINT>( srcImage.width );
            param.num_block_x = static_cast<UINT>( xblocks );
            param.format = m_bcformat;
            param.mode_id = 0;
            param.start_block_id = start_block_id;
            param.num_total_blocks = num_total_blocks;
            param.alpha_weight = m_alphaWeight;
            memcpy( mapped.pData, &param, sizeof( param ) );

            pContext->Unmap( m_constBuffer.Get(), 0 );
        }

        if ( isbc7 )
        {
            //--- BC7 -----------------------------------------------------------------
            ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr };
            RunComputeShader( pContext, m_BC7_tryMode456CS.Get(), pSRVs, 2, m_constBuffer.Get(),
                              m_err1UAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) );

            for ( UINT i = 0; i < 3; ++i )
            {
                static const UINT modes[] = { 1, 3, 7 };
                {
                    D3D11_MAPPED_SUBRESOURCE mapped;
                    HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
                    if ( FAILED(hr) )
                    {
                        ResetContext( pContext );
                        return hr;
                    }

                    ConstantsBC6HBC7 param;
                    param.tex_width = static_cast<UINT>( srcImage.width );
                    param.num_block_x = static_cast<UINT>( xblocks );
                    param.format = m_bcformat;
                    param.mode_id = modes[i];
                    param.start_block_id = start_block_id;
                    param.num_total_blocks = num_total_blocks;
                    param.alpha_weight = m_alphaWeight;
                    memcpy( mapped.pData, &param, sizeof( param ) );
                    pContext->Unmap( m_constBuffer.Get(), 0 );
                }

                pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
                RunComputeShader( pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(),
                                  (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount );
            }               

            for ( UINT i = 0; i < 2; ++i )
            {
                static const UINT modes[] = { 0, 2 };
                {
                    D3D11_MAPPED_SUBRESOURCE mapped;
                    HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
                    if ( FAILED(hr) )
                    {
                        ResetContext( pContext );
                        return hr;
                    }

                    ConstantsBC6HBC7 param;
                    param.tex_width = static_cast<UINT>( srcImage.width );
                    param.num_block_x = static_cast<UINT>( xblocks );
                    param.format = m_bcformat;
                    param.mode_id = modes[i];
                    param.start_block_id = start_block_id;
                    param.num_total_blocks = num_total_blocks;
                    param.alpha_weight = m_alphaWeight;
                    memcpy( mapped.pData, &param, sizeof( param ) );
                    pContext->Unmap( m_constBuffer.Get(), 0 );
                }

                pSRVs[1] = (i & 1) ? m_err1SRV.Get() : m_err2SRV.Get();
                RunComputeShader( pContext, m_BC7_tryMode02CS.Get(), pSRVs, 2, m_constBuffer.Get(),
                                  (i & 1) ? m_err2UAV.Get() : m_err1UAV.Get(), uThreadGroupCount );
            }

            pSRVs[1] = m_err2SRV.Get();
            RunComputeShader( pContext, m_BC7_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(),
                              m_outputUAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) );
        }
        else
        {
            //--- BC6H ----------------------------------------------------------------
            ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr };
            RunComputeShader( pContext, m_BC6H_tryModeG10CS.Get(), pSRVs, 2, m_constBuffer.Get(),
                              m_err1UAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) );

            for ( UINT i = 0; i < 10; ++i )
            {
                {
                    D3D11_MAPPED_SUBRESOURCE mapped;
                    HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
                    if ( FAILED(hr) )
                    {
                        ResetContext( pContext );
                        return hr;
                    }

                    ConstantsBC6HBC7 param;
                    param.tex_width = static_cast<UINT>( srcImage.width );
                    param.num_block_x = static_cast<UINT>( xblocks );
                    param.format = m_bcformat;
                    param.mode_id = i;
                    param.start_block_id = start_block_id;
                    param.num_total_blocks = num_total_blocks;
                    memcpy( mapped.pData, &param, sizeof( param ) );
                    pContext->Unmap( m_constBuffer.Get(), 0 );
                }

                pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
                RunComputeShader( pContext, m_BC6H_tryModeLE10CS.Get(), pSRVs, 2, m_constBuffer.Get(),
                                  (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), std::max<UINT>(uThreadGroupCount / 2, 1) );
            }               

            pSRVs[1] = m_err1SRV.Get();
            RunComputeShader( pContext, m_BC6H_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(),
                              m_outputUAV.Get(), std::max<UINT>(uThreadGroupCount / 2, 1) );
        }

        start_block_id += n;
        num_blocks -= n;
    }

    ResetContext( pContext );

    //--- Copy output texture back to CPU ---------------------------------------------

    pContext->CopyResource( m_outputCPU.Get(), m_output.Get() );

    D3D11_MAPPED_SUBRESOURCE mapped;
    HRESULT hr = pContext->Map( m_outputCPU.Get(), 0, D3D11_MAP_READ, 0, &mapped );
    if ( SUCCEEDED(hr) )
    {
        const uint8_t *pSrc = reinterpret_cast<const uint8_t *>( mapped.pData );
        uint8_t *pDest = destImage.pixels;

        size_t pitch = xblocks * sizeof( BufferBC6HBC7 );

        size_t rows = std::max<size_t>( 1, ( destImage.height + 3 ) >> 2 );

        for( size_t h = 0; h < rows; ++h )
        {
            memcpy( pDest, pSrc, destImage.rowPitch );

            pSrc += pitch;
            pDest += destImage.rowPitch;
        }

        pContext->Unmap( m_outputCPU.Get(), 0 );
    }

    return hr;
}