//------------------------------------------------------------------------------------- static HRESULT _CompressBC( _In_ const Image& image, _In_ const Image& result, _In_ DWORD bcflags, _In_ float alphaRef, _In_ bool degenerate ) { if ( !image.pixels || !result.pixels ) return E_POINTER; assert( image.width == result.width ); assert( image.height == result.height ); const DXGI_FORMAT format = image.format; size_t sbpp = BitsPerPixel( format ); if ( !sbpp ) return E_FAIL; if ( sbpp < 8 ) { // We don't support compressing from monochrome (DXGI_FORMAT_R1_UNORM) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } // Round to bytes sbpp = ( sbpp + 7 ) / 8; uint8_t *pDest = result.pixels; // Determine BC format encoder BC_ENCODE pfEncode; size_t blocksize; switch(result.format) { case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: pfEncode = nullptr; blocksize = 8; break; case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: pfEncode = D3DXEncodeBC2; blocksize = 16; break; case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: pfEncode = D3DXEncodeBC3; blocksize = 16; break; case DXGI_FORMAT_BC4_UNORM: pfEncode = D3DXEncodeBC4U; blocksize = 8; break; case DXGI_FORMAT_BC4_SNORM: pfEncode = D3DXEncodeBC4S; blocksize = 8; break; case DXGI_FORMAT_BC5_UNORM: pfEncode = D3DXEncodeBC5U; blocksize = 16; break; case DXGI_FORMAT_BC5_SNORM: pfEncode = D3DXEncodeBC5S; blocksize = 16; break; case DXGI_FORMAT_BC6H_UF16: pfEncode = D3DXEncodeBC6HU; blocksize = 16; break; case DXGI_FORMAT_BC6H_SF16: pfEncode = D3DXEncodeBC6HS; blocksize = 16; break; case DXGI_FORMAT_BC7_UNORM: case DXGI_FORMAT_BC7_UNORM_SRGB: pfEncode = D3DXEncodeBC7; blocksize = 16; break; default: return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } XMVECTOR temp[16]; const uint8_t *pSrc = image.pixels; const size_t rowPitch = image.rowPitch; for( size_t h=0; h < image.height; h += 4 ) { const uint8_t *sptr = pSrc; uint8_t* dptr = pDest; for( size_t count = 0; count < rowPitch; count += sbpp*4 ) { if ( !_LoadScanline( &temp[0], 4, sptr, rowPitch, format ) ) return E_FAIL; if ( image.height > 1 ) { if ( !_LoadScanline( &temp[4], 4, sptr + rowPitch, rowPitch, format ) ) return E_FAIL; if ( image.height > 2 ) { if ( !_LoadScanline( &temp[8], 4, sptr + rowPitch*2, rowPitch, format ) ) return E_FAIL; if ( !_LoadScanline( &temp[12], 4, sptr + rowPitch*3, rowPitch, format ) ) return E_FAIL; } } if ( degenerate ) { assert( image.width < 4 || image.height < 4 ); const size_t uSrc[] = { 0, 0, 0, 1 }; if ( image.width < 4 ) { for( size_t t=0; t < image.height && t < 4; ++t ) { for( size_t s = image.width; s < 4; ++s ) { temp[ t*4 + s ] = temp[ t*4 + uSrc[s] ]; } } } if ( image.height < 4 ) { for( size_t t=image.height; t < 4; ++t ) { for( size_t s =0; s < 4; ++s ) { temp[ t*4 + s ] = temp[ uSrc[t]*4 + s ]; } } } } _ConvertScanline( temp, 16, result.format, format, 0 ); if ( pfEncode ) pfEncode( dptr, temp, bcflags ); else D3DXEncodeBC1( dptr, temp, alphaRef, bcflags ); sptr += sbpp*4; dptr += blocksize; } pSrc += rowPitch*4; pDest += result.rowPitch; } return S_OK; }
//------------------------------------------------------------------------------------- static HRESULT _DecompressBC( _In_ const Image& cImage, _In_ const Image& result ) { if ( !cImage.pixels || !result.pixels ) return E_POINTER; assert( cImage.width == result.width ); assert( cImage.height == result.height ); // Image must be a multiple of 4 (degenerate cases of 1x1, 1x2, 2x1, and 2x2 are allowed) size_t width = cImage.width; if ( (width % 4) != 0 ) { if ( width != 1 && width != 2 ) return E_INVALIDARG; } size_t height = cImage.height; if ( (height % 4) != 0 ) { if ( height != 1 && height != 2 ) return E_INVALIDARG; } const DXGI_FORMAT format = result.format; size_t dbpp = BitsPerPixel( format ); if ( !dbpp ) return E_FAIL; if ( dbpp < 8 ) { // We don't support decompressing to monochrome (DXGI_FORMAT_R1_UNORM) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } // Round to bytes dbpp = ( dbpp + 7 ) / 8; uint8_t *pDest = result.pixels; if ( !pDest ) return E_POINTER; // Promote "typeless" BC formats DXGI_FORMAT cformat; switch( cImage.format ) { case DXGI_FORMAT_BC1_TYPELESS: cformat = DXGI_FORMAT_BC1_UNORM; break; case DXGI_FORMAT_BC2_TYPELESS: cformat = DXGI_FORMAT_BC2_UNORM; break; case DXGI_FORMAT_BC3_TYPELESS: cformat = DXGI_FORMAT_BC3_UNORM; break; case DXGI_FORMAT_BC4_TYPELESS: cformat = DXGI_FORMAT_BC4_UNORM; break; case DXGI_FORMAT_BC5_TYPELESS: cformat = DXGI_FORMAT_BC5_UNORM; break; case DXGI_FORMAT_BC6H_TYPELESS: cformat = DXGI_FORMAT_BC6H_UF16; break; case DXGI_FORMAT_BC7_TYPELESS: cformat = DXGI_FORMAT_BC7_UNORM; break; default: cformat = cImage.format; break; } // Determine BC format decoder BC_DECODE pfDecode; size_t sbpp; switch(cformat) { case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: pfDecode = D3DXDecodeBC1; sbpp = 8; break; case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: pfDecode = D3DXDecodeBC2; sbpp = 16; break; case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: pfDecode = D3DXDecodeBC3; sbpp = 16; break; case DXGI_FORMAT_BC4_UNORM: pfDecode = D3DXDecodeBC4U; sbpp = 8; break; case DXGI_FORMAT_BC4_SNORM: pfDecode = D3DXDecodeBC4S; sbpp = 8; break; case DXGI_FORMAT_BC5_UNORM: pfDecode = D3DXDecodeBC5U; sbpp = 16; break; case DXGI_FORMAT_BC5_SNORM: pfDecode = D3DXDecodeBC5S; sbpp = 16; break; case DXGI_FORMAT_BC6H_UF16: pfDecode = D3DXDecodeBC6HU; sbpp = 16; break; case DXGI_FORMAT_BC6H_SF16: pfDecode = D3DXDecodeBC6HS; sbpp = 16; break; case DXGI_FORMAT_BC7_UNORM: case DXGI_FORMAT_BC7_UNORM_SRGB: pfDecode = D3DXDecodeBC7; sbpp = 16; break; default: return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } XMVECTOR temp[16]; const uint8_t *pSrc = cImage.pixels; const size_t rowPitch = result.rowPitch; for( size_t h=0; h < cImage.height; h += 4 ) { const uint8_t *sptr = pSrc; uint8_t* dptr = pDest; for( size_t count = 0; count < cImage.rowPitch; count += sbpp ) { pfDecode( temp, sptr ); _ConvertScanline( temp, 16, format, cformat, 0 ); if ( !_StoreScanline( dptr, rowPitch, format, &temp[0], 4 ) ) return E_FAIL; if ( result.height > 1 ) { if ( !_StoreScanline( dptr + rowPitch, rowPitch, format, &temp[4], 4 ) ) return E_FAIL; if ( result.height > 2 ) { if ( !_StoreScanline( dptr + rowPitch*2, rowPitch, format, &temp[8], 4 ) ) return E_FAIL; if ( !_StoreScanline( dptr + rowPitch*3, rowPitch, format, &temp[12], 4 ) ) return E_FAIL; } } sptr += sbpp; dptr += dbpp*4; } pSrc += cImage.rowPitch; pDest += rowPitch*4; } return S_OK; }
static HRESULT _CompressBC_Parallel( _In_ const Image& image, _In_ const Image& result, _In_ DWORD bcflags, _In_ float alphaRef ) { if ( !image.pixels || !result.pixels ) return E_POINTER; // Parallel version doesn't support degenerate case assert( ((image.width % 4) == 0) && ((image.height % 4) == 0 ) ); assert( image.width == result.width ); assert( image.height == result.height ); const DXGI_FORMAT format = image.format; size_t sbpp = BitsPerPixel( format ); if ( !sbpp ) return E_FAIL; if ( sbpp < 8 ) { // We don't support compressing from monochrome (DXGI_FORMAT_R1_UNORM) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } // Round to bytes sbpp = ( sbpp + 7 ) / 8; // Determine BC format encoder BC_ENCODE pfEncode; size_t blocksize; switch(result.format) { case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: pfEncode = nullptr; blocksize = 8; break; case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: pfEncode = D3DXEncodeBC2; blocksize = 16; break; case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: pfEncode = D3DXEncodeBC3; blocksize = 16; break; case DXGI_FORMAT_BC4_UNORM: pfEncode = D3DXEncodeBC4U; blocksize = 8; break; case DXGI_FORMAT_BC4_SNORM: pfEncode = D3DXEncodeBC4S; blocksize = 8; break; case DXGI_FORMAT_BC5_UNORM: pfEncode = D3DXEncodeBC5U; blocksize = 16; break; case DXGI_FORMAT_BC5_SNORM: pfEncode = D3DXEncodeBC5S; blocksize = 16; break; case DXGI_FORMAT_BC6H_UF16: pfEncode = D3DXEncodeBC6HU; blocksize = 16; break; case DXGI_FORMAT_BC6H_SF16: pfEncode = D3DXEncodeBC6HS; blocksize = 16; break; case DXGI_FORMAT_BC7_UNORM: case DXGI_FORMAT_BC7_UNORM_SRGB: pfEncode = D3DXEncodeBC7; blocksize = 16; break; default: return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } // Refactored version of loop to support parallel independance const size_t nBlocks = std::max<size_t>(1, image.width / 4) * std::max<size_t>(1, image.height / 4); bool fail = false; #pragma omp parallel for for( int nb=0; nb < static_cast<int>( nBlocks ); ++nb ) { const size_t nbWidth = std::max<size_t>(1, image.width / 4); const size_t y = nb / nbWidth; const size_t x = nb - (y*nbWidth); assert( x < image.width && y < image.height ); size_t rowPitch = image.rowPitch; const uint8_t *pSrc = image.pixels + (y*4*rowPitch) + (x*4*sbpp); uint8_t *pDest = result.pixels + (nb*blocksize); XMVECTOR temp[16]; if ( !_LoadScanline( &temp[0], 4, pSrc, rowPitch, format ) ) fail = true; if ( !_LoadScanline( &temp[4], 4, pSrc + rowPitch, rowPitch, format ) ) fail = true; if ( !_LoadScanline( &temp[8], 4, pSrc + rowPitch*2, rowPitch, format ) ) fail = true; if ( !_LoadScanline( &temp[12], 4, pSrc + rowPitch*3, rowPitch, format ) ) fail = true; _ConvertScanline( temp, 16, result.format, format, 0 ); if ( pfEncode ) pfEncode( pDest, temp, bcflags ); else D3DXEncodeBC1( pDest, temp, alphaRef, bcflags ); } return (fail) ? E_FAIL : S_OK; }
//------------------------------------------------------------------------------------- static HRESULT _CompressBC( _In_ const Image& image, _In_ const Image& result, _In_ DWORD bcflags, _In_ DWORD srgb, _In_ float alphaRef ) { if ( !image.pixels || !result.pixels ) return E_POINTER; assert( image.width == result.width ); assert( image.height == result.height ); const DXGI_FORMAT format = image.format; size_t sbpp = BitsPerPixel( format ); if ( !sbpp ) return E_FAIL; if ( sbpp < 8 ) { // We don't support compressing from monochrome (DXGI_FORMAT_R1_UNORM) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } // Round to bytes sbpp = ( sbpp + 7 ) / 8; uint8_t *pDest = result.pixels; // Determine BC format encoder BC_ENCODE pfEncode; size_t blocksize; DWORD cflags; if ( !_DetermineEncoderSettings( result.format, pfEncode, blocksize, cflags ) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); XMVECTOR temp[16]; const uint8_t *pSrc = image.pixels; const size_t rowPitch = image.rowPitch; for( size_t h=0; h < image.height; h += 4 ) { const uint8_t *sptr = pSrc; uint8_t* dptr = pDest; size_t ph = std::min<size_t>( 4, image.height - h ); size_t w = 0; for( size_t count = 0; count < rowPitch; count += sbpp*4, w += 4 ) { size_t pw = std::min<size_t>( 4, image.width - w ); assert( pw > 0 && ph > 0 ); if ( !_LoadScanline( &temp[0], pw, sptr, rowPitch, format ) ) return E_FAIL; if ( ph > 1 ) { if ( !_LoadScanline( &temp[4], pw, sptr + rowPitch, rowPitch, format ) ) return E_FAIL; if ( ph > 2 ) { if ( !_LoadScanline( &temp[8], pw, sptr + rowPitch*2, rowPitch, format ) ) return E_FAIL; if ( ph > 3 ) { if ( !_LoadScanline( &temp[12], pw, sptr + rowPitch*3, rowPitch, format ) ) return E_FAIL; } } } if ( pw != 4 || ph != 4 ) { // Replicate pixels for partial block static const size_t uSrc[] = { 0, 0, 0, 1 }; if ( pw < 4 ) { for( size_t t = 0; t < ph && t < 4; ++t ) { for( size_t s = pw; s < 4; ++s ) { #pragma prefast(suppress: 26000, "PREFAST false positive") temp[ (t << 2) | s ] = temp[ (t << 2) | uSrc[s] ]; } } } if ( ph < 4 ) { for( size_t t = ph; t < 4; ++t ) { for( size_t s = 0; s < 4; ++s ) { #pragma prefast(suppress: 26000, "PREFAST false positive") temp[ (t << 2) | s ] = temp[ (uSrc[t] << 2) | s ]; } } } } _ConvertScanline( temp, 16, result.format, format, cflags | srgb ); if ( pfEncode ) pfEncode( dptr, temp, bcflags ); else D3DXEncodeBC1( dptr, temp, alphaRef, bcflags ); sptr += sbpp*4; dptr += blocksize; } pSrc += rowPitch*4; pDest += result.rowPitch; } return S_OK; }
//------------------------------------------------------------------------------------- static HRESULT _DecompressBC( _In_ const Image& cImage, _In_ const Image& result ) { if ( !cImage.pixels || !result.pixels ) return E_POINTER; assert( cImage.width == result.width ); assert( cImage.height == result.height ); const DXGI_FORMAT format = result.format; size_t dbpp = BitsPerPixel( format ); if ( !dbpp ) return E_FAIL; if ( dbpp < 8 ) { // We don't support decompressing to monochrome (DXGI_FORMAT_R1_UNORM) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } // Round to bytes dbpp = ( dbpp + 7 ) / 8; uint8_t *pDest = result.pixels; if ( !pDest ) return E_POINTER; // Promote "typeless" BC formats DXGI_FORMAT cformat; switch( cImage.format ) { case DXGI_FORMAT_BC1_TYPELESS: cformat = DXGI_FORMAT_BC1_UNORM; break; case DXGI_FORMAT_BC2_TYPELESS: cformat = DXGI_FORMAT_BC2_UNORM; break; case DXGI_FORMAT_BC3_TYPELESS: cformat = DXGI_FORMAT_BC3_UNORM; break; case DXGI_FORMAT_BC4_TYPELESS: cformat = DXGI_FORMAT_BC4_UNORM; break; case DXGI_FORMAT_BC5_TYPELESS: cformat = DXGI_FORMAT_BC5_UNORM; break; case DXGI_FORMAT_BC6H_TYPELESS: cformat = DXGI_FORMAT_BC6H_UF16; break; case DXGI_FORMAT_BC7_TYPELESS: cformat = DXGI_FORMAT_BC7_UNORM; break; default: cformat = cImage.format; break; } // Determine BC format decoder BC_DECODE pfDecode; size_t sbpp; switch(cformat) { case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: pfDecode = D3DXDecodeBC1; sbpp = 8; break; case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: pfDecode = D3DXDecodeBC2; sbpp = 16; break; case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: pfDecode = D3DXDecodeBC3; sbpp = 16; break; case DXGI_FORMAT_BC4_UNORM: pfDecode = D3DXDecodeBC4U; sbpp = 8; break; case DXGI_FORMAT_BC4_SNORM: pfDecode = D3DXDecodeBC4S; sbpp = 8; break; case DXGI_FORMAT_BC5_UNORM: pfDecode = D3DXDecodeBC5U; sbpp = 16; break; case DXGI_FORMAT_BC5_SNORM: pfDecode = D3DXDecodeBC5S; sbpp = 16; break; case DXGI_FORMAT_BC6H_UF16: pfDecode = D3DXDecodeBC6HU; sbpp = 16; break; case DXGI_FORMAT_BC6H_SF16: pfDecode = D3DXDecodeBC6HS; sbpp = 16; break; case DXGI_FORMAT_BC7_UNORM: case DXGI_FORMAT_BC7_UNORM_SRGB: pfDecode = D3DXDecodeBC7; sbpp = 16; break; default: return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } XMVECTOR temp[16]; const uint8_t *pSrc = cImage.pixels; const size_t rowPitch = result.rowPitch; for( size_t h=0; h < cImage.height; h += 4 ) { const uint8_t *sptr = pSrc; uint8_t* dptr = pDest; size_t ph = std::min<size_t>( 4, cImage.height - h ); size_t w = 0; for( size_t count = 0; count < cImage.rowPitch; count += sbpp, w += 4 ) { pfDecode( temp, sptr ); _ConvertScanline( temp, 16, format, cformat, 0 ); size_t pw = std::min<size_t>( 4, cImage.width - w ); assert( pw > 0 && ph > 0 ); if ( !_StoreScanline( dptr, rowPitch, format, &temp[0], pw ) ) return E_FAIL; if ( ph > 1 ) { if ( !_StoreScanline( dptr + rowPitch, rowPitch, format, &temp[4], pw ) ) return E_FAIL; if ( ph > 2 ) { if ( !_StoreScanline( dptr + rowPitch*2, rowPitch, format, &temp[8], pw ) ) return E_FAIL; if ( ph > 3 ) { if ( !_StoreScanline( dptr + rowPitch*3, rowPitch, format, &temp[12], pw ) ) return E_FAIL; } } } sptr += sbpp; dptr += dbpp*4; } pSrc += cImage.rowPitch; pDest += rowPitch*4; } return S_OK; }
static HRESULT _CompressBC_Parallel( _In_ const Image& image, _In_ const Image& result, _In_ DWORD bcflags, _In_ DWORD srgb, _In_ float alphaRef ) { if ( !image.pixels || !result.pixels ) return E_POINTER; assert( image.width == result.width ); assert( image.height == result.height ); const DXGI_FORMAT format = image.format; size_t sbpp = BitsPerPixel( format ); if ( !sbpp ) return E_FAIL; if ( sbpp < 8 ) { // We don't support compressing from monochrome (DXGI_FORMAT_R1_UNORM) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); } // Round to bytes sbpp = ( sbpp + 7 ) / 8; // Determine BC format encoder BC_ENCODE pfEncode; size_t blocksize; DWORD cflags; if ( !_DetermineEncoderSettings( result.format, pfEncode, blocksize, cflags ) ) return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); // Refactored version of loop to support parallel independance const size_t nBlocks = std::max<size_t>(1, (image.width + 3) / 4 ) * std::max<size_t>(1, (image.height + 3) / 4 ); bool fail = false; #pragma omp parallel for for( int nb=0; nb < static_cast<int>( nBlocks ); ++nb ) { const size_t nbWidth = std::max<size_t>(1, (image.width + 3) / 4 ); const size_t y = nb / nbWidth; const size_t x = nb - (y*nbWidth); assert( x < image.width && y < image.height ); size_t rowPitch = image.rowPitch; const uint8_t *pSrc = image.pixels + (y*4*rowPitch) + (x*4*sbpp); uint8_t *pDest = result.pixels + (nb*blocksize); size_t ph = std::min<size_t>( 4, image.height - y ); size_t pw = std::min<size_t>( 4, image.width - x ); assert( pw > 0 && ph > 0 ); XMVECTOR temp[16]; if ( !_LoadScanline( &temp[0], pw, pSrc, rowPitch, format ) ) fail = true; if ( ph > 1 ) { if ( !_LoadScanline( &temp[4], pw, pSrc + rowPitch, rowPitch, format ) ) fail = true; if ( ph > 2 ) { if ( !_LoadScanline( &temp[8], pw, pSrc + rowPitch*2, rowPitch, format ) ) fail = true; if ( ph > 3 ) { if ( !_LoadScanline( &temp[12], pw, pSrc + rowPitch*3, rowPitch, format ) ) fail = true; } } } if ( pw != 4 || ph != 4 ) { // Replicate pixels for partial block static const size_t uSrc[] = { 0, 0, 0, 1 }; if ( pw < 4 ) { for( size_t t = 0; t < ph && t < 4; ++t ) { for( size_t s = pw; s < 4; ++s ) { temp[ (t << 2) | s ] = temp[ (t << 2) | uSrc[s] ]; } } } if ( ph < 4 ) { for( size_t t = ph; t < 4; ++t ) { for( size_t s = 0; s < 4; ++s ) { temp[ (t << 2) | s ] = temp[ (uSrc[t] << 2) | s ]; } } } } _ConvertScanline( temp, 16, result.format, format, cflags | srgb ); if ( pfEncode ) pfEncode( pDest, temp, bcflags ); else D3DXEncodeBC1( pDest, temp, alphaRef, bcflags ); } return (fail) ? E_FAIL : S_OK; }