Ejemplo n.º 1
0
    namespace Vmx
    {
        const v128_u8 K8_PERM_U = SIMD_VEC_SETR_EPI8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E);
        const v128_u8 K8_PERM_V = SIMD_VEC_SETR_EPI8(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F);

        template<bool align, bool first> 
        SIMD_INLINE void DeinterleavedUv(const Loader<align> & uv, Storer<align> & u, Storer<align> & v)
        {
            v128_u8 _uv0 = Load<align, first>(uv);
            v128_u8 _uv1 = Load<align, false>(uv);

            Store<align, first>(u, vec_perm(_uv0, _uv1, K8_PERM_U));
            Store<align, first>(v, vec_perm(_uv0, _uv1, K8_PERM_V));
        }

        template <bool align> void DeinterleaveUv(const uint8_t * uv, size_t uvStride, size_t width, size_t height, 
            uint8_t * u, size_t uStride, uint8_t * v, size_t vStride)
        {
            assert(width >= A);
            if(align)
                assert(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride));

            size_t alignedWidth = AlignLo(width, A);
            for(size_t row = 0; row < height; ++row)
            {
                Loader<align> _uv(uv);
                Storer<align> _u(u), _v(v);
                DeinterleavedUv<align, true>(_uv, _u, _v);
                for(size_t col = A; col < alignedWidth; col += A)
                    DeinterleavedUv<align, false>(_uv, _u, _v);
                Flush(_u, _v);

                if(width != alignedWidth)
                {
                    Loader<false> _uv(uv + 2*(width - A));
                    Storer<false> _u(u + width - A), _v(v + width - A);
                    DeinterleavedUv<false, true>(_uv, _u, _v);
                    Flush(_u, _v);
                }

                uv += uvStride;
                u += uStride;
                v += vStride;
            }
        }

        void DeinterleaveUv(const uint8_t * uv, size_t uvStride, size_t width, size_t height, 
            uint8_t * u, size_t uStride, uint8_t * v, size_t vStride)
        {
            if(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride))
                DeinterleaveUv<true>(uv, uvStride, width, height, u, uStride, v, vStride);
            else
                DeinterleaveUv<false>(uv, uvStride, width, height, u, uStride, v, vStride);
        }
    }
Ejemplo n.º 2
0
        template <bool align> void FillBgr(uint8_t * dst, size_t stride, size_t width, size_t height, uint8_t blue, uint8_t green, uint8_t red)
        {
            if(align)
                assert(Aligned(dst) && Aligned(stride));

            size_t alignedWidth = AlignLo(width, A);

            v128_u8 bgr0 = SIMD_VEC_SETR_EPI8(blue, green, red, blue, green, red, blue, green, red, blue, green, red, blue, green, red, blue);
            v128_u8 bgr1 = SIMD_VEC_SETR_EPI8(green, red, blue, green, red, blue, green, red, blue, green, red, blue, green, red, blue, green);
            v128_u8 bgr2 = SIMD_VEC_SETR_EPI8(red, blue, green, red, blue, green, red, blue, green, red, blue, green, red, blue, green, red);

            for(size_t row = 0; row < height; ++row)
            {
                Storer<align> _dst(dst);
                Store<align, true>(_dst, bgr0);
                Store<align, false>(_dst, bgr1);
                Store<align, false>(_dst, bgr2);
                for(size_t col = A; col < alignedWidth; col += A)
                {
                    Store<align, false>(_dst, bgr0);
                    Store<align, false>(_dst, bgr1);
                    Store<align, false>(_dst, bgr2);
                }
                Flush(_dst);

                if(alignedWidth != width)
                {
                    Storer<false> _dst(dst + (width - A)*3);
                    Store<false, true>(_dst, bgr0);
                    Store<false, false>(_dst, bgr1);
                    Store<false, false>(_dst, bgr2);
                    Flush(_dst);
                }

                dst += stride;
            }
        }
Ejemplo n.º 3
0
    namespace Vmx
    {
        const v128_u16 K16_BLUE_RED = SIMD_VEC_SET2_EPI16(Base::BLUE_TO_GRAY_WEIGHT, Base::RED_TO_GRAY_WEIGHT);        
        const v128_u16 K16_GREEN_0000 = SIMD_VEC_SET2_EPI16(Base::GREEN_TO_GRAY_WEIGHT, 0x0000);
        const v128_u32 K32_ROUND_TERM = SIMD_VEC_SET1_EPI32(Base::BGR_TO_GRAY_ROUND_TERM);
        const v128_u32 K32_SHIFT = SIMD_VEC_SET1_EPI32(Base::BGR_TO_GRAY_AVERAGING_SHIFT);

        const v128_u8 K8_PERM_0 = SIMD_VEC_SETR_EPI8(0x00, 0x01, 0x02, 0x00, 0x03, 0x04, 0x05, 0x00, 0x06, 0x07, 0x08, 0x00, 0x09, 0x0A, 0x0B, 0x00);
        const v128_u8 K8_PERM_1 = SIMD_VEC_SETR_EPI8(0x0C, 0x0D, 0x0E, 0x00, 0x0F, 0x10, 0x11, 0x00, 0x12, 0x13, 0x14, 0x00, 0x15, 0x16, 0x17, 0x00);
        const v128_u8 K8_PERM_2 = SIMD_VEC_SETR_EPI8(0x08, 0x09, 0x0A, 0x00, 0x0B, 0x0C, 0x0D, 0x00, 0x0E, 0x0F, 0x10, 0x00, 0x11, 0x12, 0x13, 0x00);
        const v128_u8 K8_PERM_3 = SIMD_VEC_SETR_EPI8(0x14, 0x15, 0x16, 0x00, 0x17, 0x18, 0x19, 0x00, 0x1A, 0x1B, 0x1C, 0x00, 0x1D, 0x1E, 0x1F, 0x00);

        SIMD_INLINE v128_u32 BgraToGray32(v128_u8 bgra)
        {
            const v128_u16 _b_r = vec_mule(bgra, K8_01);
            const v128_u16 _g_a = vec_mulo(bgra, K8_01);
            const v128_u32 weightedSum = vec_add(vec_mule(_g_a, K16_GREEN_0000),
                vec_add(vec_mule(_b_r, K16_BLUE_RED), vec_mulo(_b_r, K16_BLUE_RED)));
            return vec_sr(vec_add(weightedSum, K32_ROUND_TERM), K32_SHIFT);
        }

        template<bool align, bool first>
        SIMD_INLINE void BgrToGray(const Loader<align> & bgr, Storer<align> & gray)
        {
            v128_u8 _bgr[3];
            _bgr[0] = Load<align, first>(bgr);
            _bgr[1] = Load<align, false>(bgr);
            _bgr[2] = Load<align, false>(bgr);

            const v128_u16 lo = vec_packsu(
                BgraToGray32(vec_perm(_bgr[0], _bgr[1], K8_PERM_0)), 
                BgraToGray32(vec_perm(_bgr[0], _bgr[1], K8_PERM_1)));
            const v128_u16 hi = vec_packsu(
                BgraToGray32(vec_perm(_bgr[1], _bgr[2], K8_PERM_2)), 
                BgraToGray32(vec_perm(_bgr[1], _bgr[2], K8_PERM_3)));
            Store<align, first>(gray, vec_packsu(lo, hi));
        }

        template <bool align> void BgrToGray(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * gray, size_t grayStride)
        {
            assert(width >= A);
            if(align)
                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(gray) && Aligned(grayStride));

            size_t alignedWidth = AlignLo(width, A);
            for(size_t row = 0; row < height; ++row)
            {
                Loader<align> _bgr(bgr);
                Storer<align> _gray(gray);
                BgrToGray<align, true>(_bgr, _gray);
                for(size_t col = A; col < alignedWidth; col += A)
                    BgrToGray<align, false>(_bgr, _gray);
                Flush(_gray);

                if(alignedWidth != width)
                {
                    Loader<false> _bgr(bgr + 3*(width - A));
                    Storer<false> _gray(gray + width - A);
                    BgrToGray<false, true>(_bgr, _gray);
                    Flush(_gray);
                }

                bgr += bgrStride;
                gray += grayStride;
            }
        }

        void BgrToGray(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * gray, size_t grayStride)
        {
            if(Aligned(bgr) && Aligned(gray) && Aligned(bgrStride) && Aligned(grayStride))
                BgrToGray<true>(bgr, width, height, bgrStride, gray, grayStride);
            else
                BgrToGray<false>(bgr, width, height, bgrStride, gray, grayStride);
        }
    }
Ejemplo n.º 4
0
    namespace Vmx
    {
        const v128_u8 K8_PERM_GR = SIMD_VEC_SETR_EPI8(0x00, 0x01, 0x00, 0x06, 0x00, 0x09, 0x00, 0x0E, 0x00, 0x11, 0x00, 0x16, 0x00, 0x19, 0x00, 0x1E);
        const v128_u8 K8_PERM_BG = SIMD_VEC_SETR_EPI8(0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0D, 0x00, 0x10, 0x00, 0x15, 0x00, 0x18, 0x00, 0x1D);
        const v128_u8 K8_PERM_GB = SIMD_VEC_SETR_EPI8(0x00, 0x01, 0x00, 0x04, 0x00, 0x09, 0x00, 0x0C, 0x00, 0x11, 0x00, 0x14, 0x00, 0x19, 0x00, 0x1C);
        const v128_u8 K8_PERM_RG = SIMD_VEC_SETR_EPI8(0x00, 0x02, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x0D, 0x00, 0x12, 0x00, 0x15, 0x00, 0x1A, 0x00, 0x1D);

        template <int format, int row, bool align, bool first> 
        SIMD_INLINE void BgraToBayer(const Loader<align> & bgra, const v128_u8 perm[4][2], Storer<align> & bayer)
        {
            const v128_u16 lo = (v128_u16)vec_perm(Load<align, first>(bgra), Load<align, false>(bgra), perm[format][row]);
            const v128_u16 hi = (v128_u16)vec_perm(Load<align, false>(bgra), Load<align, false>(bgra), perm[format][row]);
            Store<align, first>(bayer, vec_pack(lo, hi));
        }

        template <int format, bool align> 
        void BgraToBayer(const uint8_t * bgra, size_t width, size_t height, size_t bgraStride, uint8_t * bayer, size_t bayerStride)
        {
            assert(width >= A);
            if(align)
                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(bayer) && Aligned(bayerStride));

            size_t alignedWidth = AlignLo(width, A);

            const v128_u8 perm[4][2] = 
            {
                {K8_PERM_GR, K8_PERM_BG}, 
                {K8_PERM_GB, K8_PERM_RG}, 
                {K8_PERM_RG, K8_PERM_GB}, 
                {K8_PERM_BG, K8_PERM_GR}
            };

            for(size_t row = 0; row < height; row += 2)
            {
                Loader<align> _bgra0(bgra);
                Storer<align> _bayer0(bayer);
                BgraToBayer<format, 0, align, true>(_bgra0, perm, _bayer0);
                for(size_t col = A; col < alignedWidth; col += A)
                    BgraToBayer<format, 0, align, false>(_bgra0, perm, _bayer0);
                Flush(_bayer0);

                if(width != alignedWidth)
                {
                    Loader<false> _bgra(bgra + 4*(width - A));
                    Storer<false> _bayer(bayer + width - A);
                    BgraToBayer<format, 0, false, true>(_bgra, perm, _bayer);
                    Flush(_bayer);
                }

                bgra += bgraStride;
                bayer += bayerStride; 

                Loader<align> _bgra1(bgra);
                Storer<align> _bayer1(bayer);
                BgraToBayer<format, 1, align, true>(_bgra1, perm, _bayer1);
                for(size_t col = A; col < alignedWidth; col += A)
                    BgraToBayer<format, 1, align, false>(_bgra1, perm, _bayer1);
                Flush(_bayer1);

                if(width != alignedWidth)
                {
                    Loader<false> _bgra(bgra + 4*(width - A));
                    Storer<false> _bayer(bayer + width - A);
                    BgraToBayer<format, 1, false, true>(_bgra, perm, _bayer);
                    Flush(_bayer);
                }

                bgra += bgraStride;
                bayer += bayerStride; 
            }        
        }

        template<bool align>
        void BgraToBayer(const uint8_t * bgra, size_t width, size_t height, size_t bgraStride, uint8_t * bayer, size_t bayerStride, SimdPixelFormatType bayerFormat)
        {
            assert((width%2 == 0) && (height%2 == 0));

            switch(bayerFormat)
            {
            case SimdPixelFormatBayerGrbg: 
                BgraToBayer<0, align>(bgra, width, height, bgraStride, bayer, bayerStride);
                break;
            case SimdPixelFormatBayerGbrg:
                BgraToBayer<1, align>(bgra, width, height, bgraStride, bayer, bayerStride);
                break;
            case SimdPixelFormatBayerRggb:
                BgraToBayer<2, align>(bgra, width, height, bgraStride, bayer, bayerStride);
                break;
            case SimdPixelFormatBayerBggr:
                BgraToBayer<3, align>(bgra, width, height, bgraStride, bayer, bayerStride);
                break;
            default:
                assert(0);
            }        
        }

        void BgraToBayer(const uint8_t * bgra, size_t width, size_t height, size_t bgraStride, uint8_t * bayer, size_t bayerStride, SimdPixelFormatType bayerFormat)
        {
            if(Aligned(bgra) && Aligned(bgraStride) && Aligned(bayer) && Aligned(bayerStride))
                BgraToBayer<true>(bgra, width, height, bgraStride, bayer, bayerStride, bayerFormat);
            else
                BgraToBayer<false>(bgra, width, height, bgraStride, bayer, bayerStride, bayerFormat);
        }
    }
Ejemplo n.º 5
0
    namespace Vsx
    {
        const v128_u8 K8_PERM_0 = SIMD_VEC_SETR_EPI8(0x00, 0x01, 0x02, 0x13, 0x03, 0x04, 0x05, 0x17, 0x06, 0x07, 0x08, 0x1B, 0x09, 0x0A, 0x0B, 0x1F);
        const v128_u8 K8_PERM_1 = SIMD_VEC_SETR_EPI8(0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B);
        const v128_u8 K8_PERM_2 = SIMD_VEC_SETR_EPI8(0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13);
        const v128_u8 K8_PERM_3 = SIMD_VEC_SETR_EPI8(0x04, 0x05, 0x06, 0x13, 0x07, 0x08, 0x09, 0x17, 0x0A, 0x0B, 0x0C, 0x1B, 0x0D, 0x0E, 0x0F, 0x1F);

        template <bool align, bool first> SIMD_INLINE void BgrToBgra(const Loader<align> & bgr, const v128_u8 & alpha, Storer<align> & bgra)
        {
            const v128_u8 bgr0 = Load<align, first>(bgr);
            const v128_u8 bgr1 = Load<align, false>(bgr);
            const v128_u8 bgr2 = Load<align, false>(bgr);
            Store<align, first>(bgra, vec_perm(bgr0, alpha, K8_PERM_0));
            Store<align, false>(bgra, vec_perm(vec_perm(bgr0, bgr1, K8_PERM_1), alpha, K8_PERM_0));
            Store<align, false>(bgra, vec_perm(vec_perm(bgr1, bgr2, K8_PERM_2), alpha, K8_PERM_3));
            Store<align, false>(bgra, vec_perm(bgr2, alpha, K8_PERM_3));
        }

        template <bool align> void BgrToBgra(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            assert(width >= A);
            if(align)
                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(bgr) && Aligned(bgrStride));

            size_t alignedWidth = AlignLo(width, A);
            if(width == alignedWidth)
                alignedWidth -= A;

            const v128_u8 _alpha = SetU8(alpha);

            for(size_t row = 0; row < height; ++row)
            {
                Loader<align> _bgr(bgr);
                Storer<align> _bgra(bgra);
                BgrToBgra<align, true>(_bgr, _alpha, _bgra);
                for(size_t col = A; col < alignedWidth; col += A)
                    BgrToBgra<align, false>(_bgr, _alpha, _bgra);
                Flush(_bgra);

                if(width != alignedWidth)
                {
                    Loader<false> _bgr(bgr + 3*(width - A));
                    Storer<false> _bgra(bgra + 4*(width - A));
                    BgrToBgra<false, true>(_bgr, _alpha, _bgra);
                    Flush(_bgra);
                }

                bgra += bgraStride;
                bgr += bgrStride;
            }
        }

        void BgrToBgra(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            if(Aligned(bgra) && Aligned(bgraStride) && Aligned(bgr) && Aligned(bgrStride))
                BgrToBgra<true>(bgr, width, height, bgrStride, bgra, bgraStride, alpha);
            else
                BgrToBgra<false>(bgr, width, height, bgrStride, bgra, bgraStride, alpha);
        }

        const v128_u8 K8_PERM_48 = SIMD_VEC_SETR_EPI8(0x01, 0x11, 0x03, 0x13, 0x05, 0x15, 0x07, 0x17, 0x09, 0x19, 0x0B, 0x1B, 0x0D, 0x1D, 0x0F, 0x1F);

        template <bool align, bool first> 
        SIMD_INLINE void Bgr48pToBgra32(const uint8_t * blue, const uint8_t * green, const uint8_t * red, size_t offset, 
            const v128_u8 & alpha, Storer<align> & bgra)
        {
            const v128_u8 _blue = Load<align>(blue + offset);
            const v128_u8 _green = Load<align>(green + offset);
            const v128_u8 _red = Load<align>(red + offset);

            v128_u16 bg = (v128_u16)vec_perm(_blue, _green, K8_PERM_48);
            v128_u16 ra = (v128_u16)vec_perm(_red, alpha, K8_PERM_48);

            Store<align, first>(bgra, (v128_u8)UnpackLoU16(ra, bg));
            Store<align, false>(bgra, (v128_u8)UnpackHiU16(ra, bg));
        }

        template <bool align> void Bgr48pToBgra32(const uint8_t * blue, size_t blueStride, size_t width, size_t height,
            const uint8_t * green, size_t greenStride, const uint8_t * red, size_t redStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            assert(width >= HA);
            if(align)
            {
                assert(Aligned(blue) && Aligned(blueStride));
                assert(Aligned(green) && Aligned(greenStride));
                assert(Aligned(red) && Aligned(redStride));
                assert(Aligned(bgra) && Aligned(bgraStride));
            }

            v128_u8 _alpha = SetU8(alpha);
            size_t alignedWidth = AlignLo(width, HA);
            for(size_t row = 0; row < height; ++row)
            {
                Storer<align> _bgra(bgra);
                Bgr48pToBgra32<align, true>(blue, green, red, 0, _alpha, _bgra);
                for(size_t col = HA; col < alignedWidth; col += HA)
                    Bgr48pToBgra32<align, false>(blue, green, red, col*2, _alpha, _bgra);
                Flush(_bgra);

                if(width != alignedWidth)
                {
                    Storer<false> _bgra(bgra + (width - HA)*4);
                    Bgr48pToBgra32<false, true>(blue, green, red, (width - HA)*2, _alpha, _bgra);
                    Flush(_bgra);
                }

                blue += blueStride;
                green += greenStride;
                red += redStride;
                bgra += bgraStride;
            }
        }

        void Bgr48pToBgra32(const uint8_t * blue, size_t blueStride, size_t width, size_t height,
            const uint8_t * green, size_t greenStride, const uint8_t * red, size_t redStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            if(Aligned(blue) && Aligned(blueStride) && Aligned(green) && Aligned(greenStride) && 
                Aligned(red) && Aligned(redStride) && Aligned(bgra) && Aligned(bgraStride))
                Bgr48pToBgra32<true>(blue, blueStride, width, height, green, greenStride, red, redStride, bgra, bgraStride, alpha);
            else
                Bgr48pToBgra32<false>(blue, blueStride, width, height, green, greenStride, red, redStride, bgra, bgraStride, alpha);
        }
    }