Exemplo n.º 1
0
bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
    int size = computeImageSize(w, h, d, compressionOptions.getBitCount(), compressionOptions.pitchAlignment, compressionOptions.format);
    outputOptions.beginImage(size, w, h, d, face, mipmap);

    // Decide what compressor to use.
    AutoPtr<CompressorInterface> compressor;
#if defined HAVE_CUDA
    if (cudaEnabled && w * h >= 512)
    {
        compressor = chooseGpuCompressor(compressionOptions);
    }
#endif
    if (compressor == NULL)
    {
        compressor = chooseCpuCompressor(compressionOptions);
    }

    if (compressor == NULL)
    {
        outputOptions.error(Error_UnsupportedFeature);
    }
    else
    {
        compressor->compress(alphaMode, w, h, d, rgba, dispatcher, compressionOptions, outputOptions);
    }

    outputOptions.endImage();

    return true;
}
Exemplo n.º 2
0
bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
	// Make sure enums match.
	nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
	nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
	nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);

	// Get output handler.
	if (!outputOptions.openFile())
	{
		if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
		return false;
	}

	inputOptions.computeTargetExtents();

	// Output DDS header.
	if (!outputHeader(inputOptions, compressionOptions, outputOptions))
	{
		return false;
	}

	for (uint f = 0; f < inputOptions.faceCount; f++)
	{
		if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions))
		{
			return false;
		}
	}

	outputOptions.closeFile();

	return true;
}
void ColorBlockCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
    nvDebugCheck(d == 1);

    CompressorContext context;
    context.alphaMode = alphaMode;
    context.w = w;
    context.h = h;
    context.d = d;
    context.data = data;
    context.compressionOptions = &compressionOptions;

    context.bs = blockSize();
    context.bw = (w + 3) / 4;
    context.bh = (h + 3) / 4;

    context.compressor = this;

    SequentialTaskDispatcher sequential;

    // Use a single thread to compress small textures.
    if (context.bh < 4) dispatcher = &sequential;

#if _DEBUG
    dispatcher = &sequential;
#endif

    const uint count = context.bw * context.bh;
    const uint size = context.bs * count;
    context.mem = new uint8[size];

    dispatcher->dispatch(ColorBlockCompressorTask, &context, count);

    outputOptions.writeData(context.mem, size);

    delete [] context.mem;
}
Exemplo n.º 4
0
bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int h, int d, int mipmapCount, bool isNormalMap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
    if (w <= 0 || h <= 0 || d <= 0 || mipmapCount <= 0)
    {
        outputOptions.error(Error_InvalidInput);
        return false;
    }

    if (!outputOptions.outputHeader)
    {
        return true;
    }

    // Output DDS header.
    if (outputOptions.container == Container_DDS || outputOptions.container == Container_DDS10)
    {
        DDSHeader header;

        header.setUserVersion(outputOptions.version);

        if (textureType == TextureType_2D) {
            header.setTexture2D();
        }
        else if (textureType == TextureType_Cube) {
            header.setTextureCube();
        }
        else if (textureType == TextureType_3D) {
            header.setTexture3D();
            header.setDepth(d);
        }

        header.setWidth(w);
        header.setHeight(h);
        header.setMipmapCount(mipmapCount);

        bool supported = true;

        if (outputOptions.container == Container_DDS10)
        {
            if (compressionOptions.format == Format_RGBA)
            {
                const uint bitcount = compressionOptions.getBitCount();

                if (bitcount == 16)
                {
                    if (compressionOptions.rsize == 16)
                    {
                        header.setDX10Format(56); // R16_UNORM
                    }
                    else
                    {
                        // B5G6R5_UNORM
                        // B5G5R5A1_UNORM
                        supported = false;
                    }
                }
                else if (bitcount == 32)
                {
                    // B8G8R8A8_UNORM
                    // B8G8R8X8_UNORM
                    // R8G8B8A8_UNORM
                    // R10G10B10A2_UNORM
                    supported = false;
                }
                else {
                    supported = false;
                }
            }
            else
            {
                if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
                    header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC1_UNORM_SRGB : DXGI_FORMAT_BC1_UNORM);
                    if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true);
                    if (isNormalMap) header.setNormalFlag(true);
                }
                else if (compressionOptions.format == Format_DXT3) {
                    header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC2_UNORM_SRGB : DXGI_FORMAT_BC2_UNORM);
                }
                else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
                    header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_BC3_UNORM);
                }
                else if (compressionOptions.format == Format_DXT5n) {
                    header.setDX10Format(DXGI_FORMAT_BC3_UNORM);
                    if (isNormalMap) header.setNormalFlag(true);
                }
                else if (compressionOptions.format == Format_BC4) {
                    header.setDX10Format(DXGI_FORMAT_BC4_UNORM); // DXGI_FORMAT_BC4_SNORM ?
                }
                else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) {
                    header.setDX10Format(DXGI_FORMAT_BC5_UNORM); // DXGI_FORMAT_BC5_SNORM ?
                    if (isNormalMap) header.setNormalFlag(true);
                }
                else if (compressionOptions.format == Format_BC6) {
                    if (compressionOptions.pixelType == PixelType_Float) header.setDX10Format(DXGI_FORMAT_BC6H_SF16);
                    /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.setDX10Format(DXGI_FORMAT_BC6H_UF16); // By default we assume unsigned.
                }
                else if (compressionOptions.format == Format_BC7) {
                    header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM);
                    if (isNormalMap) header.setNormalFlag(true);
                }
                else if (compressionOptions.format == Format_CTX1) {
                    supported = false;
                }
                else {
                    supported = false;
                }
            }
        }
        else
        {
            if (compressionOptions.format == Format_RGBA)
            {
                // Get output bit count.
                header.setPitch(computeBytePitch(w, compressionOptions.getBitCount(), compressionOptions.pitchAlignment));

                if (compressionOptions.pixelType == PixelType_Float)
                {
                    if (compressionOptions.rsize == 16 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0)
                    {
                        header.setFormatCode(111); // D3DFMT_R16F
                    }
                    else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 0 && compressionOptions.asize == 0)
                    {
                        header.setFormatCode(112); // D3DFMT_G16R16F
                    }
                    else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 16 && compressionOptions.asize == 16)
                    {
                        header.setFormatCode(113); // D3DFMT_A16B16G16R16F
                    }
                    else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0)
                    {
                        header.setFormatCode(114); // D3DFMT_R32F
                    }
                    else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 0 && compressionOptions.asize == 0)
                    {
                        header.setFormatCode(115); // D3DFMT_G32R32F
                    }
                    else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 32 && compressionOptions.asize == 32)
                    {
                        header.setFormatCode(116); // D3DFMT_A32B32G32R32F
                    }
                    else
                    {
                        supported = false;
                    }
                }
                else // Fixed point
                {
                    const uint bitcount = compressionOptions.getBitCount();

                    if (compressionOptions.bitcount != 0)
                    {
                        // Masks already computed.
                        header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
                    }
                    else if (bitcount <= 32)
                    {
                        // Compute pixel format masks.
                        const uint ashift = 0;
                        const uint bshift = ashift + compressionOptions.asize;
                        const uint gshift = bshift + compressionOptions.bsize;
                        const uint rshift = gshift + compressionOptions.gsize;

                        const uint rmask = ((1 << compressionOptions.rsize) - 1) << rshift;
                        const uint gmask = ((1 << compressionOptions.gsize) - 1) << gshift;
                        const uint bmask = ((1 << compressionOptions.bsize) - 1) << bshift;
                        const uint amask = ((1 << compressionOptions.asize) - 1) << ashift;

                        header.setPixelFormat(bitcount, rmask, gmask, bmask, amask);
                    }
                    else
                    {
                        supported = false;
                    }
                }
            }
            else
            {
                header.setLinearSize(computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.pitchAlignment, compressionOptions.format));

                if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
                    header.setFourCC('D', 'X', 'T', '1');
                    if (isNormalMap) header.setNormalFlag(true);
                }
                else if (compressionOptions.format == Format_DXT3) {
                    header.setFourCC('D', 'X', 'T', '3');
                }
                else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
                    header.setFourCC('D', 'X', 'T', '5');
                }
                else if (compressionOptions.format == Format_DXT5n) {
                    header.setFourCC('D', 'X', 'T', '5');
                    if (isNormalMap) {
                        header.setNormalFlag(true);
                        header.setSwizzleCode('A', '2', 'D', '5');
                        //header.setSwizzleCode('x', 'G', 'x', 'R');
                    }
                }
                else if (compressionOptions.format == Format_BC4) {
                    header.setFourCC('A', 'T', 'I', '1');
                }
                else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) {
                    header.setFourCC('A', 'T', 'I', '2');
                    if (isNormalMap) {
                        header.setNormalFlag(true);
                        header.setSwizzleCode('A', '2', 'X', 'Y');
                    }
                }
                else if (compressionOptions.format == Format_BC6) {
                    header.setFourCC('Z', 'O', 'H', ' ');               // This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
                    supported = false;
                }
                else if (compressionOptions.format == Format_BC7) {
                    header.setFourCC('Z', 'O', 'L', 'A');               // This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
                    if (isNormalMap) header.setNormalFlag(true);
                    supported = false;
                }
                else if (compressionOptions.format == Format_CTX1) {
                    header.setFourCC('C', 'T', 'X', '1');
                    if (isNormalMap) header.setNormalFlag(true);
                }
                else {
                    supported = false;
                }
            }

            if (outputOptions.srgb) header.setSrgbFlag(true);
        }

        if (!supported)
        {
            // This container does not support the requested format.
            outputOptions.error(Error_UnsupportedOutputFormat);
            return false;
        }

        uint headerSize = 128;
        if (header.hasDX10Header())
        {
            nvStaticCheck(sizeof(DDSHeader) == 128 + 20);
            headerSize = 128 + 20;
        }

        // Swap bytes if necessary.
        header.swapBytes();

        bool writeSucceed = outputOptions.writeData(&header, headerSize);
        if (!writeSucceed)
        {
            outputOptions.error(Error_FileWrite);
        }

        return writeSucceed;
    }

    return true;
}
Exemplo n.º 5
0
bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
    // Make sure enums match.
    nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp);
    nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror);
    nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat);

    // Get output handler.
    if (!outputOptions.hasValidOutputHandler()) {
        outputOptions.error(Error_FileOpen);
        return false;
    }

    nvtt::Surface img;
    img.setWrapMode(inputOptions.wrapMode);
    img.setAlphaMode(inputOptions.alphaMode);
    img.setNormalMap(inputOptions.isNormalMap);

    const int faceCount = inputOptions.faceCount;
    int width = inputOptions.width;
    int height = inputOptions.height;
    int depth = inputOptions.depth;

    nv::getTargetExtent(&width, &height, &depth, inputOptions.maxExtent, inputOptions.roundMode, inputOptions.textureType);

    // If the extents have not changed, then we can use source images for all mipmaps.
    bool canUseSourceImages = (inputOptions.width == width && inputOptions.height == height && inputOptions.depth == depth);

    int mipmapCount = 1;
    if (inputOptions.generateMipmaps) {
        mipmapCount = countMipmaps(width, height, depth);
        if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel);
    }

    if (!outputHeader(inputOptions.textureType, width, height, depth, mipmapCount, img.isNormalMap(), compressionOptions, outputOptions)) {
        return false;
    }


    // Output images.
    for (int f = 0; f < faceCount; f++)
    {
        int w = width;
        int h = height;
        int d = depth;
        bool canUseSourceImagesForThisFace = canUseSourceImages;

        img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);

        // To normal map.
        if (inputOptions.convertToNormalMap) {
            img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
            img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
        }

        // To linear space.
        if (!img.isNormalMap()) {
            img.toLinear(inputOptions.inputGamma);
        }

        // Resize input.
        img.resize(w, h, d, ResizeFilter_Box);

        nvtt::Surface tmp = img;
        if (!img.isNormalMap()) {
            tmp.toGamma(inputOptions.outputGamma);
        }

        quantize(tmp, compressionOptions);
        compress(tmp, f, 0, compressionOptions, outputOptions);

        for (int m = 1; m < mipmapCount; m++) {
            w = max(1, w/2);
            h = max(1, h/2);
            d = max(1, d/2);

            int idx = m * faceCount + f;

            bool useSourceImages = false;
            if (canUseSourceImagesForThisFace) {
                if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
                    canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images.
                }
                else {
                    useSourceImages = true;
                }
            }

            if (useSourceImages) {
                img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);

                // For already generated mipmaps, we need to convert to linear.
                if (!img.isNormalMap()) {
                    img.toLinear(inputOptions.inputGamma);
                }
            }
            else {
                if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
                    float params[2] = { inputOptions.kaiserStretch, inputOptions.kaiserAlpha };
                    img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
                }
                else {
                    img.buildNextMipmap(inputOptions.mipmapFilter);
                }
            }
            nvDebugCheck(img.width() == w);
            nvDebugCheck(img.height() == h);
            nvDebugCheck(img.depth() == d);

            if (img.isNormalMap()) {
                if (inputOptions.normalizeMipmaps) {
                    img.normalizeNormalMap();
                }
                tmp = img;
            }
            else {
                tmp = img;
                tmp.toGamma(inputOptions.outputGamma);
            }

            quantize(tmp, compressionOptions);
            compress(tmp, f, m, compressionOptions, outputOptions);
        }
    }

    return true;
}
/// Compress image using CUDA.
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
	nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA

	// Image size in blocks.
	const uint w = (m_image->width() + 3) / 4;
	const uint h = (m_image->height() + 3) / 4;

	uint imageSize = w * h * 16 * sizeof(Color32);
    uint * blockLinearImage = (uint *) malloc(imageSize);
	convertToBlockLinear(m_image, blockLinearImage);

	const uint blockNum = w * h;
	const uint compressedSize = blockNum * 8;

	AlphaBlockDXT5 * alphaBlocks = NULL;
	alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U));

	setupCompressKernel(compressionOptions.colorWeight.ptr());
	
	clock_t start = clock();

	uint bn = 0;
	while(bn != blockNum)
	{
		uint count = min(blockNum - bn, MAX_BLOCKS);

	    cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);

		// Launch kernel.
		if (m_alphaMode == AlphaMode_Transparency)
		{
			compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
		}
		else
		{
			compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
		}

		// Compress alpha in parallel with the GPU.
		for (uint i = 0; i < count; i++)
		{
			ColorBlock rgba(blockLinearImage + (bn + i) * 16);
			QuickCompress::compressDXT5A(rgba, alphaBlocks + i);
		}

		// Check for errors.
		cudaError_t err = cudaGetLastError();
		if (err != cudaSuccess)
		{
			nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
			outputOptions.error(Error_CudaError);
		}

		// Copy result to host, overwrite swizzled image.
		cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);

		// Output result.
		for (uint i = 0; i < count; i++)
		{
			outputOptions.writeData(alphaBlocks + i, 8);
			outputOptions.writeData(blockLinearImage + i * 2, 8);
		}

		bn += count;
	}

	clock_t end = clock();
	//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);

	free(alphaBlocks);
	free(blockLinearImage);

#else
	outputOptions.error(Error_CudaError);
#endif
}