//-------------------------------------------------------------------------------------- // Encode the source texture to BC7 and store the result in a buffer // The source texture can only have 1 sub resource, i.e. it must be a signle 2D texture which has only 1 mip level // The job of breaking down texture arrays, or texture with multiple mip levels is taken care of in the base class //-------------------------------------------------------------------------------------- HRESULT CGPUBC7Encoder::GPU_Encode( ID3D11Device* pDevice, ID3D11DeviceContext* pContext, ID3D11Texture2D* pSrcTexture, DXGI_FORMAT dstFormat, ID3D11Buffer** ppDstTextureAsBufOut ) { ID3D11ShaderResourceView* pSRV = nullptr; ID3D11Buffer* pErrBestModeBuffer[2] = { nullptr, nullptr }; ID3D11UnorderedAccessView* pUAV = nullptr; ID3D11UnorderedAccessView* pErrBestModeUAV[2] = { nullptr, nullptr }; ID3D11ShaderResourceView* pErrBestModeSRV[2] = { nullptr, nullptr }; ID3D11Buffer* pCBCS = nullptr; if ( !(dstFormat == DXGI_FORMAT_BC7_UNORM || dstFormat == DXGI_FORMAT_BC7_UNORM_SRGB) || !ppDstTextureAsBufOut ) { return E_INVALIDARG; } D3D11_TEXTURE2D_DESC texSrcDesc; pSrcTexture->GetDesc( &texSrcDesc ); HRESULT hr = S_OK; // Create a SRV for input texture { D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; SRVDesc.Texture2D.MipLevels = texSrcDesc.MipLevels; SRVDesc.Texture2D.MostDetailedMip = 0; SRVDesc.Format = texSrcDesc.Format; SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; V_GOTO( pDevice->CreateShaderResourceView( pSrcTexture, &SRVDesc, &pSRV ) ); #if defined(_DEBUG) || defined(PROFILE) if ( pSRV ) { pSRV->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 SRV" ) - 1, "BC7 SRV" ); } #endif } // Create output buffer D3D11_BUFFER_DESC sbOutDesc; { sbOutDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; sbOutDesc.CPUAccessFlags = 0; sbOutDesc.Usage = D3D11_USAGE_DEFAULT; sbOutDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; sbOutDesc.StructureByteStride = sizeof( BufferBC6HBC7 ); sbOutDesc.ByteWidth = texSrcDesc.Height * texSrcDesc.Width * sizeof( BufferBC6HBC7 ) / BLOCK_SIZE; //+ texSrcDesc.Height * texSrcDesc.Width * sizeof( BufferBC7 ) * 5;//For dump V_GOTO( pDevice->CreateBuffer(&sbOutDesc, nullptr, ppDstTextureAsBufOut) ); V_GOTO( pDevice->CreateBuffer(&sbOutDesc, nullptr, &pErrBestModeBuffer[0]) ); V_GOTO( pDevice->CreateBuffer(&sbOutDesc, nullptr, &pErrBestModeBuffer[1]) ); _Analysis_assume_( pErrBestModeBuffer[0] != 0 ); #if defined(_DEBUG) || defined(PROFILE) if ( *ppDstTextureAsBufOut ) { (*ppDstTextureAsBufOut)->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 Dest" ) - 1, "BC7 Dest" ); } if ( pErrBestModeBuffer[0] ) { pErrBestModeBuffer[0]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest0" ) - 1, "BC7 ErrBest0" ); } if ( pErrBestModeBuffer[1] ) { pErrBestModeBuffer[1]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest1" ) - 1, "BC7 ErrBest1" ); } #endif } // Create UAV of the output texture { D3D11_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {}; UAVDesc.Buffer.FirstElement = 0; UAVDesc.Buffer.NumElements = sbOutDesc.ByteWidth / sbOutDesc.StructureByteStride; UAVDesc.Format = DXGI_FORMAT_UNKNOWN; UAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; #pragma warning (push) #pragma warning (disable:6387) V_GOTO( pDevice->CreateUnorderedAccessView( *ppDstTextureAsBufOut, &UAVDesc, &pUAV ) ); V_GOTO( pDevice->CreateUnorderedAccessView( pErrBestModeBuffer[0], &UAVDesc, &pErrBestModeUAV[0] ) ); V_GOTO( pDevice->CreateUnorderedAccessView( pErrBestModeBuffer[1], &UAVDesc, &pErrBestModeUAV[1] ) ); #pragma warning (pop) #if defined(_DEBUG) || defined(PROFILE) if ( pUAV ) { pUAV->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 Dest UAV" ) - 1, "BC7 Dest UAV" ); } if ( pErrBestModeUAV[0] ) { pErrBestModeUAV[0]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest0 UAV" ) - 1, "BC7 ErrBest0 UAV" ); } if ( pErrBestModeUAV[1] ) { pErrBestModeUAV[1]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest1 UAV" ) - 1, "BC7 ErrBest1 UAV" ); } #endif } { D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; SRVDesc.Buffer.FirstElement = 0; SRVDesc.Buffer.NumElements = texSrcDesc.Height * texSrcDesc.Width / BLOCK_SIZE; SRVDesc.Format = DXGI_FORMAT_UNKNOWN; SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; #pragma warning (push) #pragma warning (disable:6387) V_GOTO( pDevice->CreateShaderResourceView( pErrBestModeBuffer[0], &SRVDesc, &pErrBestModeSRV[0]) ); V_GOTO( pDevice->CreateShaderResourceView( pErrBestModeBuffer[1], &SRVDesc, &pErrBestModeSRV[1]) ); #pragma warning (pop) #if defined(_DEBUG) || defined(PROFILE) if ( pErrBestModeSRV[0] ) { pErrBestModeSRV[0]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest0 SRV" ) - 1, "BC7 ErrBest0 SRV" ); } if ( pErrBestModeSRV[1] ) { pErrBestModeSRV[1]->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7 ErrBest1 SRV" ) - 1, "BC7 ErrBest1 SRV" ); } #endif } // Create constant buffer { D3D11_BUFFER_DESC cbDesc; cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; cbDesc.MiscFlags = 0; cbDesc.ByteWidth = sizeof( UINT ) * 8; V_GOTO( pDevice->CreateBuffer( &cbDesc, nullptr, &pCBCS ) ); #if defined(_DEBUG) || defined(PROFILE) if ( pCBCS ) { pCBCS->SetPrivateData( WKPDID_D3DDebugObjectName, sizeof( "BC7Encode" ) - 1, "BC7Encode" ); } #endif } int const MAX_BLOCK_BATCH = 64; int num_total_blocks = texSrcDesc.Width / BLOCK_SIZE_X * texSrcDesc.Height / BLOCK_SIZE_Y; int num_blocks = num_total_blocks; int start_block_id = 0; while (num_blocks > 0) { int n = min(num_blocks, MAX_BLOCK_BATCH); UINT uThreadGroupCount = n; { D3D11_MAPPED_SUBRESOURCE cbMapped; pContext->Map( pCBCS, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbMapped ); UINT param[8]; param[0] = texSrcDesc.Width; param[1] = texSrcDesc.Width / BLOCK_SIZE_X; param[2] = dstFormat; param[3] = 0; param[4] = start_block_id; param[5] = num_total_blocks; *((float*)¶m[6]) = m_fAlphaWeight; memcpy( cbMapped.pData, param, sizeof( param ) ); pContext->Unmap( pCBCS, 0 ); } ID3D11ShaderResourceView* pSRVs[] = { pSRV, nullptr }; RunComputeShader( pContext, m_pTryMode456CS, pSRVs, 2, pCBCS, pErrBestModeUAV[0], __max(uThreadGroupCount / 4, 1), 1, 1 ); for (int i = 0; i < 3; ++ i) { int modes[] = { 1, 3, 7 }; { D3D11_MAPPED_SUBRESOURCE cbMapped; pContext->Map( pCBCS, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbMapped ); UINT param[8]; param[0] = texSrcDesc.Width; param[1] = texSrcDesc.Width / BLOCK_SIZE_X; param[2] = dstFormat; param[3] = modes[i]; param[4] = start_block_id; param[5] = num_total_blocks; *((float*)¶m[6]) = m_fAlphaWeight; memcpy( cbMapped.pData, param, sizeof( param ) ); pContext->Unmap( pCBCS, 0 ); } pSRVs[1] = pErrBestModeSRV[i & 1]; RunComputeShader( pContext, m_pTryMode137CS, pSRVs, 2, pCBCS, pErrBestModeUAV[!(i & 1)], uThreadGroupCount, 1, 1 ); } for (int i = 0; i < 2; ++ i) { int modes[] = { 0, 2 }; { D3D11_MAPPED_SUBRESOURCE cbMapped; pContext->Map( pCBCS, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbMapped ); UINT param[8]; param[0] = texSrcDesc.Width; param[1] = texSrcDesc.Width / BLOCK_SIZE_X; param[2] = dstFormat; param[3] = modes[i]; param[4] = start_block_id; param[5] = num_total_blocks; *((float*)¶m[6]) = m_fAlphaWeight; memcpy( cbMapped.pData, param, sizeof( param ) ); pContext->Unmap( pCBCS, 0 ); } pSRVs[1] = pErrBestModeSRV[!(i & 1)]; RunComputeShader( pContext, m_pTryMode02CS, pSRVs, 2, pCBCS, pErrBestModeUAV[i & 1], uThreadGroupCount, 1, 1 ); } pSRVs[1] = pErrBestModeSRV[1]; RunComputeShader( pContext, m_pEncodeBlockCS, pSRVs, 2, pCBCS, pUAV, __max(uThreadGroupCount / 4, 1), 1, 1 ); start_block_id += n; num_blocks -= n; } quit: SAFE_RELEASE(pSRV); SAFE_RELEASE(pUAV); SAFE_RELEASE(pErrBestModeSRV[0]); SAFE_RELEASE(pErrBestModeSRV[1]); SAFE_RELEASE(pErrBestModeUAV[0]); SAFE_RELEASE(pErrBestModeUAV[1]); SAFE_RELEASE(pErrBestModeBuffer[0]); SAFE_RELEASE(pErrBestModeBuffer[1]); SAFE_RELEASE(pCBCS); return hr; }
//------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT GPUCompressBC::Compress( const Image& srcImage, const Image& destImage ) { if ( !srcImage.pixels || !destImage.pixels ) return E_INVALIDARG; if ( srcImage.width != destImage.width || srcImage.height != destImage.height || srcImage.width != m_width || srcImage.height != m_height || srcImage.format != m_srcformat || destImage.format != m_bcformat ) { return E_UNEXPECTED; } //--- Create input texture -------------------------------------------------------- auto pDevice = m_device.Get(); if ( !pDevice ) return E_POINTER; // We need to avoid the hardware doing additional colorspace conversion DXGI_FORMAT inputFormat = ( m_srcformat == DXGI_FORMAT_R8G8B8A8_UNORM_SRGB ) ? DXGI_FORMAT_R8G8B8A8_UNORM : m_srcformat; ScopedObject<ID3D11Texture2D> sourceTex; { D3D11_TEXTURE2D_DESC desc; memset( &desc, 0, sizeof(desc) ); desc.Width = static_cast<UINT>( srcImage.width ); desc.Height = static_cast<UINT>( srcImage.height ); desc.MipLevels = 1; desc.ArraySize = 1; desc.Format = inputFormat; desc.SampleDesc.Count = 1; desc.Usage = D3D11_USAGE_DEFAULT; desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; D3D11_SUBRESOURCE_DATA initData; initData.pSysMem = srcImage.pixels; initData.SysMemPitch = static_cast<DWORD>( srcImage.rowPitch ); initData.SysMemSlicePitch = static_cast<DWORD>( srcImage.slicePitch ); HRESULT hr = pDevice->CreateTexture2D( &desc, &initData, sourceTex.GetAddressOf() ); if ( FAILED(hr) ) { return hr; } } ScopedObject<ID3D11ShaderResourceView> sourceSRV; { D3D11_SHADER_RESOURCE_VIEW_DESC desc; memset( &desc, 0, sizeof(desc) ); desc.Texture2D.MipLevels = 1; desc.Format = inputFormat; desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; HRESULT hr = pDevice->CreateShaderResourceView( sourceTex.Get(), &desc, sourceSRV.ReleaseAndGetAddressOf() ); if ( FAILED(hr) ) { return hr; } } //--- Compress using DirectCompute ------------------------------------------------ bool isbc7 = false; switch( m_bcformat ) { case DXGI_FORMAT_BC6H_TYPELESS: case DXGI_FORMAT_BC6H_UF16: case DXGI_FORMAT_BC6H_SF16: break; case DXGI_FORMAT_BC7_TYPELESS: case DXGI_FORMAT_BC7_UNORM: case DXGI_FORMAT_BC7_UNORM_SRGB: isbc7 = true; break; default: return E_UNEXPECTED; } const UINT MAX_BLOCK_BATCH = 64; auto pContext = m_context.Get(); if ( !pContext ) return E_UNEXPECTED; size_t xblocks = std::max<size_t>( 1, (m_width + 3) >> 2 ); size_t yblocks = std::max<size_t>( 1, (m_height + 3) >> 2 ); UINT num_total_blocks = static_cast<UINT>( xblocks * yblocks ); UINT num_blocks = num_total_blocks; int start_block_id = 0; while (num_blocks > 0) { UINT n = std::min<UINT>( num_blocks, MAX_BLOCK_BATCH ); UINT uThreadGroupCount = n; { D3D11_MAPPED_SUBRESOURCE mapped; HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); if ( FAILED(hr) ) return hr; ConstantsBC6HBC7 param; param.tex_width = static_cast<UINT>( srcImage.width ); param.num_block_x = static_cast<UINT>( xblocks ); param.format = m_bcformat; param.mode_id = 0; param.start_block_id = start_block_id; param.num_total_blocks = num_total_blocks; param.alpha_weight = m_alphaWeight; memcpy( mapped.pData, ¶m, sizeof( param ) ); pContext->Unmap( m_constBuffer.Get(), 0 ); } if ( isbc7 ) { //--- BC7 ----------------------------------------------------------------- ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr }; RunComputeShader( pContext, m_BC7_tryMode456CS.Get(), pSRVs, 2, m_constBuffer.Get(), m_err1UAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) ); for ( UINT i = 0; i < 3; ++i ) { static const UINT modes[] = { 1, 3, 7 }; { D3D11_MAPPED_SUBRESOURCE mapped; HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); if ( FAILED(hr) ) { ResetContext( pContext ); return hr; } ConstantsBC6HBC7 param; param.tex_width = static_cast<UINT>( srcImage.width ); param.num_block_x = static_cast<UINT>( xblocks ); param.format = m_bcformat; param.mode_id = modes[i]; param.start_block_id = start_block_id; param.num_total_blocks = num_total_blocks; param.alpha_weight = m_alphaWeight; memcpy( mapped.pData, ¶m, sizeof( param ) ); pContext->Unmap( m_constBuffer.Get(), 0 ); } pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get(); RunComputeShader( pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(), (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount ); } for ( UINT i = 0; i < 2; ++i ) { static const UINT modes[] = { 0, 2 }; { D3D11_MAPPED_SUBRESOURCE mapped; HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); if ( FAILED(hr) ) { ResetContext( pContext ); return hr; } ConstantsBC6HBC7 param; param.tex_width = static_cast<UINT>( srcImage.width ); param.num_block_x = static_cast<UINT>( xblocks ); param.format = m_bcformat; param.mode_id = modes[i]; param.start_block_id = start_block_id; param.num_total_blocks = num_total_blocks; param.alpha_weight = m_alphaWeight; memcpy( mapped.pData, ¶m, sizeof( param ) ); pContext->Unmap( m_constBuffer.Get(), 0 ); } pSRVs[1] = (i & 1) ? m_err1SRV.Get() : m_err2SRV.Get(); RunComputeShader( pContext, m_BC7_tryMode02CS.Get(), pSRVs, 2, m_constBuffer.Get(), (i & 1) ? m_err2UAV.Get() : m_err1UAV.Get(), uThreadGroupCount ); } pSRVs[1] = m_err2SRV.Get(); RunComputeShader( pContext, m_BC7_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(), m_outputUAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) ); } else { //--- BC6H ---------------------------------------------------------------- ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr }; RunComputeShader( pContext, m_BC6H_tryModeG10CS.Get(), pSRVs, 2, m_constBuffer.Get(), m_err1UAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) ); for ( UINT i = 0; i < 10; ++i ) { { D3D11_MAPPED_SUBRESOURCE mapped; HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); if ( FAILED(hr) ) { ResetContext( pContext ); return hr; } ConstantsBC6HBC7 param; param.tex_width = static_cast<UINT>( srcImage.width ); param.num_block_x = static_cast<UINT>( xblocks ); param.format = m_bcformat; param.mode_id = i; param.start_block_id = start_block_id; param.num_total_blocks = num_total_blocks; memcpy( mapped.pData, ¶m, sizeof( param ) ); pContext->Unmap( m_constBuffer.Get(), 0 ); } pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get(); RunComputeShader( pContext, m_BC6H_tryModeLE10CS.Get(), pSRVs, 2, m_constBuffer.Get(), (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), std::max<UINT>(uThreadGroupCount / 2, 1) ); } pSRVs[1] = m_err1SRV.Get(); RunComputeShader( pContext, m_BC6H_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(), m_outputUAV.Get(), std::max<UINT>(uThreadGroupCount / 2, 1) ); } start_block_id += n; num_blocks -= n; } ResetContext( pContext ); //--- Copy output texture back to CPU --------------------------------------------- pContext->CopyResource( m_outputCPU.Get(), m_output.Get() ); D3D11_MAPPED_SUBRESOURCE mapped; HRESULT hr = pContext->Map( m_outputCPU.Get(), 0, D3D11_MAP_READ, 0, &mapped ); if ( SUCCEEDED(hr) ) { const uint8_t *pSrc = reinterpret_cast<const uint8_t *>( mapped.pData ); uint8_t *pDest = destImage.pixels; size_t pitch = xblocks * sizeof( BufferBC6HBC7 ); size_t rows = std::max<size_t>( 1, ( destImage.height + 3 ) >> 2 ); for( size_t h = 0; h < rows; ++h ) { memcpy( pDest, pSrc, destImage.rowPitch ); pSrc += pitch; pDest += destImage.rowPitch; } pContext->Unmap( m_outputCPU.Get(), 0 ); } return hr; }