Exemple #1
0
        template <bool align> void Bgr48pToBgra32(const uint8_t * blue, size_t blueStride, size_t width, size_t height,
            const uint8_t * green, size_t greenStride, const uint8_t * red, size_t redStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            assert(width >= HA);
            if(align)
            {
                assert(Aligned(blue) && Aligned(blueStride));
                assert(Aligned(green) && Aligned(greenStride));
                assert(Aligned(red) && Aligned(redStride));
                assert(Aligned(bgra) && Aligned(bgraStride));
            }

            v128_u8 _alpha = SetU8(alpha);
            size_t alignedWidth = AlignLo(width, HA);
            for(size_t row = 0; row < height; ++row)
            {
                Storer<align> _bgra(bgra);
                Bgr48pToBgra32<align, true>(blue, green, red, 0, _alpha, _bgra);
                for(size_t col = HA; col < alignedWidth; col += HA)
                    Bgr48pToBgra32<align, false>(blue, green, red, col*2, _alpha, _bgra);
                Flush(_bgra);

                if(width != alignedWidth)
                {
                    Storer<false> _bgra(bgra + (width - HA)*4);
                    Bgr48pToBgra32<false, true>(blue, green, red, (width - HA)*2, _alpha, _bgra);
                    Flush(_bgra);
                }

                blue += blueStride;
                green += greenStride;
                red += redStride;
                bgra += bgraStride;
            }
        }
        template <bool align, bool increment> void InterferenceChangeMasked(int16_t * statistic, size_t statisticStride, size_t width, size_t height, 
            uint8_t value, int16_t saturation, const uint8_t * mask, size_t maskStride, uint8_t index)
        {
            assert(width >= A);
            if(align)
                assert(Aligned(statistic) && Aligned(statisticStride, HA) && Aligned(mask) && Aligned(maskStride));

            size_t alignedWidth = Simd::AlignLo(width, A);
            v128_u8 tailMask = ShiftLeft(K8_FF, A - width + alignedWidth);

            v128_s16 _value = SetI16(value);
            v128_s16 _saturation = SetI16(saturation);
            v128_u8 _index = SetU8(index);
            for(size_t row = 0; row < height; ++row)
            {
                Loader<align> statisticSrc(statistic), maskSrc(mask);
                Storer<align> statisticDst(statistic);
                InterferenceChangeMasked<align, true, increment>(statisticSrc, _value, _saturation, maskSrc, _index, K8_FF, statisticDst);
                for(size_t col = A; col < alignedWidth; col += A)
                    InterferenceChangeMasked<align, false, increment>(statisticSrc, _value, _saturation, maskSrc, _index, K8_FF, statisticDst);
                Flush(statisticDst);
                if(alignedWidth != width)
                {
                    Loader<false> statisticSrc(statistic + width - A), maskSrc(mask + width - A);
                    Storer<false> statisticDst(statistic + width - A);
                    InterferenceChangeMasked<false, true, increment>(statisticSrc, _value, _saturation, maskSrc, _index, tailMask, statisticDst);
                    Flush(statisticDst);
                }
                statistic += statisticStride;
                mask += maskStride;
            }
        }
Exemple #3
0
        template <bool align> void BgrToBgra(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            assert(width >= A);
            if(align)
                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(bgr) && Aligned(bgrStride));

            size_t alignedWidth = AlignLo(width, A);
            if(width == alignedWidth)
                alignedWidth -= A;

            const v128_u8 _alpha = SetU8(alpha);

            for(size_t row = 0; row < height; ++row)
            {
                Loader<align> _bgr(bgr);
                Storer<align> _bgra(bgra);
                BgrToBgra<align, true>(_bgr, _alpha, _bgra);
                for(size_t col = A; col < alignedWidth; col += A)
                    BgrToBgra<align, false>(_bgr, _alpha, _bgra);
                Flush(_bgra);

                if(width != alignedWidth)
                {
                    Loader<false> _bgr(bgr + 3*(width - A));
                    Storer<false> _bgra(bgra + 4*(width - A));
                    BgrToBgra<false, true>(_bgr, _alpha, _bgra);
                    Flush(_bgra);
                }

                bgra += bgraStride;
                bgr += bgrStride;
            }
        }
        void ConditionalFill(const uint8_t * src, size_t srcStride, size_t width, size_t height,
            uint8_t threshold, uint8_t value, uint8_t * dst, size_t dstStride)
        {
            assert(width >= A);
            if (align)
                assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride));

            size_t alignedWidth = Simd::AlignLo(width, A);

            v128_u8 _value = SetU8(value);
            v128_u8 _threshold = SetU8(threshold);
            for (size_t row = 0; row < height; ++row)
            {
                ConditionalFill<compareType, align>(src, 0, _threshold, _value, dst);
                for (size_t col = A; col < alignedWidth; col += A)
                    ConditionalFill<compareType, true>(src, col, _threshold, _value, dst);
                if (!align)
                    ConditionalFill<compareType, false>(src, alignedWidth - A, _threshold, _value, dst);
                if (alignedWidth != width)
                    ConditionalFill<compareType, false>(src, width - A, _threshold, _value, dst);
                src += srcStride;
                dst += dstStride;
            }
        }
        void ConditionalSquareGradientSum(const uint8_t * src, size_t srcStride, size_t width, size_t height,
            const uint8_t * mask, size_t maskStride, uint8_t value, uint64_t * sum)
        {
            assert(width >= A + 2 && height >= 3);
            if (align)
                assert(Aligned(src) && Aligned(srcStride) && Aligned(mask) && Aligned(maskStride));

            src += srcStride;
            mask += maskStride;
            height -= 2;

            size_t bodyWidth = Simd::AlignLo(width - 1, A);
            v128_u8 noseMask = ShiftRight(K8_FF, 1);
            v128_u8 tailMask = ShiftLeft(K8_FF, A - width + 1 + bodyWidth);
            size_t alignedWidth = Simd::AlignLo(bodyWidth - A, DA);

            v128_u8 _value = SetU8(value);
            *sum = 0;
            for (size_t row = 0; row < height; ++row)
            {
                v128_u32 sums[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 };
                {
                    const v128_u8 _mask = vec_and(Compare8u<compareType>(Load<false>(mask + 1), _value), noseMask);
                    AddSquareDifference<false>(src + 1, 1, _mask, sums[0]);
                    AddSquareDifference<false>(src + 1, srcStride, _mask, sums[1]);
                }
                size_t col = A;
                for (; col < alignedWidth; col += DA)
                {
                    ConditionalSquareGradientSum<align, compareType>(src, srcStride, mask, col, _value, sums);
                    ConditionalSquareGradientSum<align, compareType>(src, srcStride, mask, col + A, _value, sums + 2);
                }
                for (; col < bodyWidth; col += A)
                    ConditionalSquareGradientSum<align, compareType>(src, srcStride, mask, col, _value, sums);
                if (bodyWidth != width - 1)
                {
                    size_t offset = width - A - 1;
                    const v128_u8 _mask = vec_and(Compare8u<compareType>(Load<false>(mask + offset), _value), tailMask);
                    AddSquareDifference<false>(src + offset, 1, _mask, sums[0]);
                    AddSquareDifference<false>(src + offset, srcStride, _mask, sums[1]);
                }
                sums[0] = vec_add(vec_add(sums[0], sums[1]), vec_add(sums[2], sums[3]));
                *sum += ExtractSum(sums[0]);
                src += srcStride;
                mask += maskStride;
            }
        }
        template <bool align> void AbsDifferenceSums3x3Masked(const uint8_t *current, size_t currentStride, const uint8_t *background, size_t backgroundStride,
            const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sums)
        {
            assert(height > 2 && width >= A + 2);
            if (align)
                assert(Aligned(background) && Aligned(backgroundStride));

            width -= 2;
            height -= 2;
            current += 1 + currentStride;
            background += 1 + backgroundStride;
            mask += 1 + maskStride;

            size_t bodyWidth = AlignLo(width, A);
            v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth);
            v128_u8 _index = SetU8(index);

            for (size_t i = 0; i < 9; ++i)
                sums[i] = 0;

            for (size_t row = 0; row < height; ++row)
            {
                v128_u32 _sums[9];
                for (size_t i = 0; i < 9; ++i)
                    _sums[i] = K32_00000000;

                for (size_t col = 0; col < bodyWidth; col += A)
                {
                    const v128_u8 _mask = LoadMaskU8<false>(mask + col, _index);
                    const v128_u8 _current = vec_and(Load<false>(current + col), _mask);
                    AbsDifferenceSums3x3Masked<align>(_current, background + col, backgroundStride, _mask, _sums);
                }
                if (width - bodyWidth)
                {
                    const v128_u8 _mask = vec_and(LoadMaskU8<false>(mask + width - A, _index), tailMask);
                    const v128_u8 _current = vec_and(Load<false>(current + width - A), _mask);
                    AbsDifferenceSums3x3Masked<false>(_current, background + width - A, backgroundStride, _mask, _sums);
                }

                for (size_t i = 0; i < 9; ++i)
                    sums[i] += ExtractSum(_sums[i]);

                current += currentStride;
                background += backgroundStride;
                mask += maskStride;
            }
        }
        template <bool align> void AbsDifferenceSumMasked(
            const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride,
            const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sum)
        {
            assert(width >= A);
            if (align)
            {
                assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride));
                assert(Aligned(mask) && Aligned(maskStride));
            }

            size_t alignedWidth = AlignLo(width, QA);
            size_t bodyWidth = AlignLo(width, A);
            v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth);
            v128_u8 _index = SetU8(index);
            *sum = 0;
            for (size_t row = 0; row < height; ++row)
            {
                size_t col = 0;
                v128_u32 sums[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 };
                for (; col < alignedWidth; col += QA)
                {
                    AbsDifferenceSumMasked<align>(a, b, mask, col, _index, sums[0]);
                    AbsDifferenceSumMasked<align>(a, b, mask, col + A, _index, sums[1]);
                    AbsDifferenceSumMasked<align>(a, b, mask, col + 2 * A, _index, sums[2]);
                    AbsDifferenceSumMasked<align>(a, b, mask, col + 3 * A, _index, sums[3]);
                }
                sums[0] = vec_add(vec_add(sums[0], sums[1]), vec_add(sums[2], sums[3]));
                for (; col < bodyWidth; col += A)
                    AbsDifferenceSumMasked<align>(a, b, mask, col, _index, sums[0]);
                if (width - bodyWidth)
                {
                    const v128_u8 _mask = vec_and(tailMask, LoadMaskU8<false>(mask + width - A, _index));
                    AbsDifferenceSumMasked<false>(a, b, width - A, _mask, sums[0]);
                }
                *sum += ExtractSum(sums[0]);
                a += aStride;
                b += bStride;
                mask += maskStride;
            }
        }
        void ConditionalSquareSum(const uint8_t * src, size_t srcStride, size_t width, size_t height,
            const uint8_t * mask, size_t maskStride, uint8_t value, uint64_t * sum)
        {
            assert(width >= A);
            if (align)
                assert(Aligned(src) && Aligned(srcStride) && Aligned(mask) && Aligned(maskStride));

            size_t alignedWidth = AlignLo(width, QA);
            size_t bodyWidth = AlignLo(width, A);
            v128_u8 tailMask = ShiftLeft(K8_FF, A - width + alignedWidth);
            v128_u8 _value = SetU8(value);
            *sum = 0;
            for (size_t row = 0; row < height; ++row)
            {
                size_t col = 0;
                v128_u32 sums[4] = { K32_00000000, K32_00000000, K32_00000000, K32_00000000 };
                for (; col < alignedWidth; col += QA)
                {
                    ConditionalSquareSum<align, compareType>(src, mask, col, _value, sums[0]);
                    ConditionalSquareSum<align, compareType>(src, mask, col + A, _value, sums[1]);
                    ConditionalSquareSum<align, compareType>(src, mask, col + 2 * A, _value, sums[2]);
                    ConditionalSquareSum<align, compareType>(src, mask, col + 3 * A, _value, sums[3]);
                }
                sums[0] = vec_add(vec_add(sums[0], sums[1]), vec_add(sums[2], sums[3]));
                for (; col < bodyWidth; col += A)
                    ConditionalSquareSum<align, compareType>(src, mask, col, _value, sums[0]);
                if (alignedWidth != width)
                {
                    const v128_u8 _mask = Compare8u<compareType>(Load<false>(mask + width - A), _value);
                    const v128_u8 _src = vec_and(vec_and(Load<false>(src + width - A), _mask), tailMask);
                    sums[0] = vec_msum(_src, _src, sums[0]);
                }
                *sum += ExtractSum(sums[0]);
                src += srcStride;
                mask += maskStride;
            }
        }
	bool CObject::SetU8(const char* name, _U32 index, _U8 val)
	{
		return SetU8(Zion::StringFormat("%s[%d]", name, index).c_str(), val);
	}