void ColorBlockCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) { nvDebugCheck(d == 1); CompressorContext context; context.alphaMode = alphaMode; context.w = w; context.h = h; context.d = d; context.data = data; context.compressionOptions = &compressionOptions; context.bs = blockSize(); context.bw = (w + 3) / 4; context.bh = (h + 3) / 4; context.compressor = this; SequentialTaskDispatcher sequential; // Use a single thread to compress small textures. if (context.bh < 4) dispatcher = &sequential; #if _DEBUG dispatcher = &sequential; #endif const uint count = context.bw * context.bh; const uint size = context.bs * count; context.mem = new uint8[size]; dispatcher->dispatch(ColorBlockCompressorTask, &context, count); outputOptions.writeData(context.mem, size); delete [] context.mem; }
bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int h, int d, int mipmapCount, bool isNormalMap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const { if (w <= 0 || h <= 0 || d <= 0 || mipmapCount <= 0) { outputOptions.error(Error_InvalidInput); return false; } if (!outputOptions.outputHeader) { return true; } // Output DDS header. if (outputOptions.container == Container_DDS || outputOptions.container == Container_DDS10) { DDSHeader header; header.setUserVersion(outputOptions.version); if (textureType == TextureType_2D) { header.setTexture2D(); } else if (textureType == TextureType_Cube) { header.setTextureCube(); } else if (textureType == TextureType_3D) { header.setTexture3D(); header.setDepth(d); } header.setWidth(w); header.setHeight(h); header.setMipmapCount(mipmapCount); bool supported = true; if (outputOptions.container == Container_DDS10) { if (compressionOptions.format == Format_RGBA) { const uint bitcount = compressionOptions.getBitCount(); if (bitcount == 16) { if (compressionOptions.rsize == 16) { header.setDX10Format(56); // R16_UNORM } else { // B5G6R5_UNORM // B5G5R5A1_UNORM supported = false; } } else if (bitcount == 32) { // B8G8R8A8_UNORM // B8G8R8X8_UNORM // R8G8B8A8_UNORM // R10G10B10A2_UNORM supported = false; } else { supported = false; } } else { if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC1_UNORM_SRGB : DXGI_FORMAT_BC1_UNORM); if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true); if (isNormalMap) header.setNormalFlag(true); } else if (compressionOptions.format == Format_DXT3) { header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC2_UNORM_SRGB : DXGI_FORMAT_BC2_UNORM); } else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) { header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_BC3_UNORM); } else if (compressionOptions.format == Format_DXT5n) { header.setDX10Format(DXGI_FORMAT_BC3_UNORM); if (isNormalMap) header.setNormalFlag(true); } else if (compressionOptions.format == Format_BC4) { header.setDX10Format(DXGI_FORMAT_BC4_UNORM); // DXGI_FORMAT_BC4_SNORM ? } else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) { header.setDX10Format(DXGI_FORMAT_BC5_UNORM); // DXGI_FORMAT_BC5_SNORM ? if (isNormalMap) header.setNormalFlag(true); } else if (compressionOptions.format == Format_BC6) { if (compressionOptions.pixelType == PixelType_Float) header.setDX10Format(DXGI_FORMAT_BC6H_SF16); /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.setDX10Format(DXGI_FORMAT_BC6H_UF16); // By default we assume unsigned. } else if (compressionOptions.format == Format_BC7) { header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM); if (isNormalMap) header.setNormalFlag(true); } else if (compressionOptions.format == Format_CTX1) { supported = false; } else { supported = false; } } } else { if (compressionOptions.format == Format_RGBA) { // Get output bit count. header.setPitch(computeBytePitch(w, compressionOptions.getBitCount(), compressionOptions.pitchAlignment)); if (compressionOptions.pixelType == PixelType_Float) { if (compressionOptions.rsize == 16 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) { header.setFormatCode(111); // D3DFMT_R16F } else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) { header.setFormatCode(112); // D3DFMT_G16R16F } else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 16 && compressionOptions.asize == 16) { header.setFormatCode(113); // D3DFMT_A16B16G16R16F } else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) { header.setFormatCode(114); // D3DFMT_R32F } else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) { header.setFormatCode(115); // D3DFMT_G32R32F } else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 32 && compressionOptions.asize == 32) { header.setFormatCode(116); // D3DFMT_A32B32G32R32F } else { supported = false; } } else // Fixed point { const uint bitcount = compressionOptions.getBitCount(); if (compressionOptions.bitcount != 0) { // Masks already computed. header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask); } else if (bitcount <= 32) { // Compute pixel format masks. const uint ashift = 0; const uint bshift = ashift + compressionOptions.asize; const uint gshift = bshift + compressionOptions.bsize; const uint rshift = gshift + compressionOptions.gsize; const uint rmask = ((1 << compressionOptions.rsize) - 1) << rshift; const uint gmask = ((1 << compressionOptions.gsize) - 1) << gshift; const uint bmask = ((1 << compressionOptions.bsize) - 1) << bshift; const uint amask = ((1 << compressionOptions.asize) - 1) << ashift; header.setPixelFormat(bitcount, rmask, gmask, bmask, amask); } else { supported = false; } } } else { header.setLinearSize(computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.pitchAlignment, compressionOptions.format)); if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { header.setFourCC('D', 'X', 'T', '1'); if (isNormalMap) header.setNormalFlag(true); } else if (compressionOptions.format == Format_DXT3) { header.setFourCC('D', 'X', 'T', '3'); } else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) { header.setFourCC('D', 'X', 'T', '5'); } else if (compressionOptions.format == Format_DXT5n) { header.setFourCC('D', 'X', 'T', '5'); if (isNormalMap) { header.setNormalFlag(true); header.setSwizzleCode('A', '2', 'D', '5'); //header.setSwizzleCode('x', 'G', 'x', 'R'); } } else if (compressionOptions.format == Format_BC4) { header.setFourCC('A', 'T', 'I', '1'); } else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) { header.setFourCC('A', 'T', 'I', '2'); if (isNormalMap) { header.setNormalFlag(true); header.setSwizzleCode('A', '2', 'X', 'Y'); } } else if (compressionOptions.format == Format_BC6) { header.setFourCC('Z', 'O', 'H', ' '); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats. supported = false; } else if (compressionOptions.format == Format_BC7) { header.setFourCC('Z', 'O', 'L', 'A'); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats. if (isNormalMap) header.setNormalFlag(true); supported = false; } else if (compressionOptions.format == Format_CTX1) { header.setFourCC('C', 'T', 'X', '1'); if (isNormalMap) header.setNormalFlag(true); } else { supported = false; } } if (outputOptions.srgb) header.setSrgbFlag(true); } if (!supported) { // This container does not support the requested format. outputOptions.error(Error_UnsupportedOutputFormat); return false; } uint headerSize = 128; if (header.hasDX10Header()) { nvStaticCheck(sizeof(DDSHeader) == 128 + 20); headerSize = 128 + 20; } // Swap bytes if necessary. header.swapBytes(); bool writeSucceed = outputOptions.writeData(&header, headerSize); if (!writeSucceed) { outputOptions.error(Error_FileWrite); } return writeSucceed; } return true; }
/// Compress image using CUDA. void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) { nvDebugCheck(cuda::isHardwarePresent()); #if defined HAVE_CUDA // Image size in blocks. const uint w = (m_image->width() + 3) / 4; const uint h = (m_image->height() + 3) / 4; uint imageSize = w * h * 16 * sizeof(Color32); uint * blockLinearImage = (uint *) malloc(imageSize); convertToBlockLinear(m_image, blockLinearImage); const uint blockNum = w * h; const uint compressedSize = blockNum * 8; AlphaBlockDXT5 * alphaBlocks = NULL; alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); setupCompressKernel(compressionOptions.colorWeight.ptr()); clock_t start = clock(); uint bn = 0; while(bn != blockNum) { uint count = min(blockNum - bn, MAX_BLOCKS); cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); // Launch kernel. if (m_alphaMode == AlphaMode_Transparency) { compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); } else { compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); } // Compress alpha in parallel with the GPU. for (uint i = 0; i < count; i++) { ColorBlock rgba(blockLinearImage + (bn + i) * 16); QuickCompress::compressDXT5A(rgba, alphaBlocks + i); } // Check for errors. cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); outputOptions.error(Error_CudaError); } // Copy result to host, overwrite swizzled image. cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); // Output result. for (uint i = 0; i < count; i++) { outputOptions.writeData(alphaBlocks + i, 8); outputOptions.writeData(blockLinearImage + i * 2, 8); } bn += count; } clock_t end = clock(); //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); free(alphaBlocks); free(blockLinearImage); #else outputOptions.error(Error_CudaError); #endif }