예제 #1
0
		void Yuv444pToHue(const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride, 
			size_t width, size_t height, uint8_t * hue, size_t hueStride)
		{
			if(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride) && Aligned(hue) && Aligned(hueStride))
				Yuv444pToHue<true>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride);
			else
				Yuv444pToHue<false>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride);
		}
 void EdgeBackgroundShiftRangeMasked(const uint8_t * value, size_t valueStride, size_t width, size_t height,
      uint8_t * background, size_t backgroundStride, const uint8_t * mask, size_t maskStride)
 {
     if(Aligned(value) && Aligned(valueStride) && Aligned(background) && Aligned(backgroundStride) && Aligned(mask) && Aligned(maskStride))
         EdgeBackgroundShiftRangeMasked<true>(value, valueStride, width, height, background, backgroundStride, mask, maskStride);
     else
         EdgeBackgroundShiftRangeMasked<false>(value, valueStride, width, height, background, backgroundStride, mask, maskStride);
 }
예제 #3
0
 void DeinterleaveUv(const uint8_t * uv, size_t uvStride, size_t width, size_t height, 
      uint8_t * u, size_t uStride, uint8_t * v, size_t vStride)
 {
     if(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride))
         DeinterleaveUv<true>(uv, uvStride, width, height, u, uStride, v, vStride);
     else
         DeinterleaveUv<false>(uv, uvStride, width, height, u, uStride, v, vStride);
 }
예제 #4
0
 void ReduceGray3x3(const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride, 
     uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride, int compensation)
 {
     if(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride))
         ReduceGray3x3<true>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride, compensation);
     else
         ReduceGray3x3<false>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride, compensation);
 }
예제 #5
0
 void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height, 
     uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride)
 {
     if(Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride))
         TextureBoostedSaturatedGradient<true>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride);
     else
         TextureBoostedSaturatedGradient<false>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride);
 }
 void EdgeBackgroundGrowRangeSlow(const uint8_t * value, size_t valueStride, size_t width, size_t height,
      uint8_t * background, size_t backgroundStride)
 {
     if(Aligned(value) && Aligned(valueStride) && Aligned(background) && Aligned(backgroundStride))
         EdgeBackgroundGrowRangeSlow<true>(value, valueStride, width, height, background, backgroundStride);
     else
         EdgeBackgroundGrowRangeSlow<false>(value, valueStride, width, height, background, backgroundStride);
 }
 void AbsDifferenceSumMasked(const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride,
     const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sum)
 {
     if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(mask) && Aligned(maskStride))
         AbsDifferenceSumMasked<true>(a, aStride, b, bStride, mask, maskStride, index, width, height, sum);
     else
         AbsDifferenceSumMasked<false>(a, aStride, b, bStride, mask, maskStride, index, width, height, sum);
 }
 void EdgeBackgroundIncrementCount(const uint8_t * value, size_t valueStride, size_t width, size_t height,
     const uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t * backgroundCount, size_t backgroundCountStride)
 {
     if (Aligned(value) && Aligned(valueStride) && Aligned(backgroundValue) && Aligned(backgroundValueStride) && Aligned(backgroundCount) && Aligned(backgroundCountStride))
         EdgeBackgroundIncrementCount<true>(value, valueStride, width, height, backgroundValue, backgroundValueStride, backgroundCount, backgroundCountStride);
     else
         EdgeBackgroundIncrementCount<false>(value, valueStride, width, height, backgroundValue, backgroundValueStride, backgroundCount, backgroundCountStride);
 }
예제 #9
0
 void GaussianBlur3x3(const uint8_t * src, size_t srcStride, size_t width, size_t height, 
     size_t channelCount, uint8_t * dst, size_t dstStride)
 {
     if(Aligned(src) && Aligned(srcStride) && Aligned(channelCount*width) && Aligned(dst) && Aligned(dstStride))
         GaussianBlur3x3<true>(src, srcStride, width, height, channelCount, dst, dstStride);
     else
         GaussianBlur3x3<false>(src, srcStride, width, height, channelCount, dst, dstStride);
 }
예제 #10
0
 void ConditionalSquareSum(const uint8_t * src, size_t srcStride, size_t width, size_t height,
     const uint8_t * mask, size_t maskStride, uint8_t value, uint64_t * sum)
 {
     if (Aligned(src) && Aligned(srcStride) && Aligned(mask) && Aligned(maskStride))
         ConditionalSquareSum<true, compareType>(src, srcStride, width, height, mask, maskStride, value, sum);
     else
         ConditionalSquareSum<false, compareType>(src, srcStride, width, height, mask, maskStride, value, sum);
 }
예제 #11
0
 void StretchGray2x2(const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride, 
     uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride)
 {
     if(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride))
         StretchGray2x2<true>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride);
     else
         StretchGray2x2<false>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride);
 }
 void AbsDifferenceSum(const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride,
     size_t width, size_t height, uint64_t * sum)
 {
     if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride))
         AbsDifferenceSum<true>(a, aStride, b, bStride, width, height, sum);
     else
         AbsDifferenceSum<false>(a, aStride, b, bStride, width, height, sum);
 }
 void AbsDifferenceSums3x3Masked(const uint8_t *current, size_t currentStride, const uint8_t *background, size_t backgroundStride,
     const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sums)
 {
     if (Aligned(background) && Aligned(backgroundStride))
         AbsDifferenceSums3x3Masked<true>(current, currentStride, background, backgroundStride, mask, maskStride, index, width, height, sums);
     else
         AbsDifferenceSums3x3Masked<false>(current, currentStride, background, backgroundStride, mask, maskStride, index, width, height, sums);
 }
 void AbsDifferenceSums3x3(const uint8_t * current, size_t currentStride, const uint8_t * background, size_t backgroundStride,
     size_t width, size_t height, uint64_t * sums)
 {
     if (Aligned(background) && Aligned(backgroundStride))
         AbsDifferenceSums3x3<true>(current, currentStride, background, backgroundStride, width, height, sums);
     else
         AbsDifferenceSums3x3<false>(current, currentStride, background, backgroundStride, width, height, sums);
 }
예제 #15
0
        void Laplace(const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride)
        {
            assert(dstStride%sizeof(int16_t) == 0);

            if(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride))
                Laplace<true, false>(src, srcStride, width, height, (int16_t *)dst, dstStride/sizeof(int16_t));
            else
                Laplace<false, false>(src, srcStride, width, height, (int16_t *)dst, dstStride/sizeof(int16_t));
        }
예제 #16
0
        void InterferenceDecrement(uint8_t * statistic, size_t stride, size_t width, size_t height, uint8_t decrement, int16_t saturation)
        {
            assert(Aligned(stride, 2));

            if(Aligned(statistic) && Aligned(stride))
                InterferenceChange<true, false>((int16_t*)statistic, stride/2, width, height, decrement, saturation);
            else
                InterferenceChange<false, false>((int16_t*)statistic, stride/2, width, height, decrement, saturation);
        }
예제 #17
0
		void EdgeBackgroundAdjustRange(uint8_t * backgroundCount, size_t backgroundCountStride, size_t width, size_t height,
			uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t threshold)
		{
			if (Aligned(backgroundValue) && Aligned(backgroundValueStride) &&
				Aligned(backgroundCount) && Aligned(backgroundCountStride))
				EdgeBackgroundAdjustRange<true>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride, threshold);
			else
				EdgeBackgroundAdjustRange<false>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride, threshold);
		}
예제 #18
0
        void InterferenceDecrementMasked(uint8_t * statistic, size_t statisticStride, size_t width, size_t height, 
            uint8_t decrement, int16_t saturation, const uint8_t * mask, size_t maskStride, uint8_t index)
        {
            assert(Aligned(statisticStride, 2));

            if(Aligned(statistic) && Aligned(statisticStride) && Aligned(mask) && Aligned(maskStride))
                InterferenceChangeMasked<true, false>((int16_t*)statistic, statisticStride/2, width, height, decrement, saturation, mask, maskStride, index);
            else
                InterferenceChangeMasked<false, false>((int16_t*)statistic, statisticStride/2, width, height, decrement, saturation, mask, maskStride, index);
        }
예제 #19
0
        void HistogramMasked(const uint8_t * src, size_t srcStride, size_t width, size_t height, 
            const uint8_t * mask, size_t maskStride, uint8_t index, uint32_t * histogram)
        {
            assert(width >= A);

            if(Aligned(src) && Aligned(srcStride)&& Aligned(mask) && Aligned(maskStride))
                HistogramMasked<true>(src, srcStride, width, height, mask, maskStride, index, histogram);
            else
                HistogramMasked<false>(src, srcStride, width, height, mask, maskStride, index, histogram);
        }
예제 #20
0
        void AbsSecondDerivativeHistogram(const uint8_t *src, size_t width, size_t height, size_t stride,
            size_t step, size_t indent, uint32_t * histogram)
        {
            assert(width > 2*indent && height > 2*indent && indent >= step && width >= A + 2*indent);

            if(Aligned(src) && Aligned(stride))
                AbsSecondDerivativeHistogram<true>(src, width, height, stride, step, indent, histogram);
            else
                AbsSecondDerivativeHistogram<false>(src, width, height, stride, step, indent, histogram);
        }
예제 #21
0
        void BgraToBayer(const uint8_t * bgra, size_t width, size_t height, size_t bgraStride, uint8_t * bayer, size_t bayerStride)
        {
            assert(width >= A);
            if(align)
                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(bayer) && Aligned(bayerStride));

            size_t alignedWidth = AlignLo(width, A);

            const v128_u8 perm[4][2] = 
            {
                {K8_PERM_GR, K8_PERM_BG}, 
                {K8_PERM_GB, K8_PERM_RG}, 
                {K8_PERM_RG, K8_PERM_GB}, 
                {K8_PERM_BG, K8_PERM_GR}
            };

            for(size_t row = 0; row < height; row += 2)
            {
                Loader<align> _bgra0(bgra);
                Storer<align> _bayer0(bayer);
                BgraToBayer<format, 0, align, true>(_bgra0, perm, _bayer0);
                for(size_t col = A; col < alignedWidth; col += A)
                    BgraToBayer<format, 0, align, false>(_bgra0, perm, _bayer0);
                Flush(_bayer0);

                if(width != alignedWidth)
                {
                    Loader<false> _bgra(bgra + 4*(width - A));
                    Storer<false> _bayer(bayer + width - A);
                    BgraToBayer<format, 0, false, true>(_bgra, perm, _bayer);
                    Flush(_bayer);
                }

                bgra += bgraStride;
                bayer += bayerStride; 

                Loader<align> _bgra1(bgra);
                Storer<align> _bayer1(bayer);
                BgraToBayer<format, 1, align, true>(_bgra1, perm, _bayer1);
                for(size_t col = A; col < alignedWidth; col += A)
                    BgraToBayer<format, 1, align, false>(_bgra1, perm, _bayer1);
                Flush(_bayer1);

                if(width != alignedWidth)
                {
                    Loader<false> _bgra(bgra + 4*(width - A));
                    Storer<false> _bayer(bayer + width - A);
                    BgraToBayer<format, 1, false, true>(_bgra, perm, _bayer);
                    Flush(_bayer);
                }

                bgra += bgraStride;
                bayer += bayerStride; 
            }        
        }
예제 #22
0
		void EdgeBackgroundAdjustRangeMasked(uint8_t * backgroundCount, size_t backgroundCountStride, size_t width, size_t height,
			uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t threshold, const uint8_t * mask, size_t maskStride)
		{
			if (Aligned(backgroundValue) && Aligned(backgroundValueStride) && Aligned(backgroundCount) && Aligned(backgroundCountStride) &&
				Aligned(mask) && Aligned(maskStride))
				EdgeBackgroundAdjustRangeMasked<true>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride,
					threshold, mask, maskStride);
			else
				EdgeBackgroundAdjustRangeMasked<false>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride,
					threshold, mask, maskStride);
		}
예제 #23
0
        template <bool align, size_t step> void GaussianBlur3x3(
            const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride)
        {
            assert(step*width >= A);
            if(align)
                assert(Aligned(src) && Aligned(srcStride) && Aligned(step*width) && Aligned(dst) && Aligned(dstStride));

            v128_u8 a[3];

            size_t size = step*width;
            size_t bodySize = Simd::AlignHi(size, A) - A;

            Buffer buffer(Simd::AlignHi(size, A));

            LoadNose3<align, step>(src + 0, a);
            BlurCol<true>(a, buffer.src0 + 0);
            for(size_t col = A; col < bodySize; col += A)
            {
                LoadBody3<align, step>(src + col, a);
                BlurCol<true>(a, buffer.src0 + col);
            }
            LoadTail3<align, step>(src + size - A, a);
            BlurCol<align>(a, buffer.src0 + size - A);

            memcpy(buffer.src1, buffer.src0, sizeof(uint16_t)*size);

            for(size_t row = 0; row < height; ++row, dst += dstStride)
            {
                const uint8_t *src2 = src + srcStride*(row + 1);
                if(row >= height - 2)
                    src2 = src + srcStride*(height - 1);

                LoadNose3<align, step>(src2 + 0, a);
                BlurCol<true>(a, buffer.src2 + 0);
                for(size_t col = A; col < bodySize; col += A)
                {
                    LoadBody3<align, step>(src2 + col, a);
                    BlurCol<true>(a, buffer.src2 + col);
                }
                LoadTail3<align, step>(src2 + size - A, a);
                BlurCol<align>(a, buffer.src2 + size - A);

                Storer<align> _dst(dst);
                _dst.First(BlurRow<true>(buffer, 0));
                for(size_t col = A; col < bodySize; col += A)
                    _dst.Next(BlurRow<true>(buffer, col));
                Flush(_dst);
                Store<align>(dst + size - A, BlurRow<align>(buffer, size - A));

                Swap(buffer.src0, buffer.src2);
                Swap(buffer.src0, buffer.src1);
            }
        }
        void AlphaBlending(const uint8_t *src, size_t srcStride, size_t width, size_t height, size_t channelCount, 
            const uint8_t *alpha, size_t alphaStride, uint8_t *dst, size_t dstStride)
		{
            if(channelCount == 3)
                Base::AlphaBlending(src, srcStride, width, height, channelCount, alpha, alphaStride, dst, dstStride);
            else
            {
                if(Aligned(src) && Aligned(srcStride) && Aligned(alpha) && Aligned(alphaStride) && Aligned(dst) && Aligned(dstStride))
                    AlphaBlending<true>(src, srcStride, width, height, channelCount, alpha, alphaStride, dst, dstStride);
                else
                    AlphaBlending<false>(src, srcStride, width, height, channelCount, alpha, alphaStride, dst, dstStride);
            }
		}
예제 #25
0
        template <bool align> void BgrToYuv444p(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * y, size_t yStride,
            uint8_t * u, size_t uStride, uint8_t * v, size_t vStride)
        {
            assert(width >= A);
            if(align)
            {
                assert(Aligned(y) && Aligned(yStride) && Aligned(u) &&  Aligned(uStride));
                assert(Aligned(v) && Aligned(vStride) && Aligned(bgr) && Aligned(bgrStride));
            }

            size_t alignedWidth = AlignLo(width, A);
            const size_t A3 = A*3;
            for(size_t row = 0; row < height; ++row)
            {
                for(size_t col = 0, colBgr = 0; col < alignedWidth; col += A, colBgr += A3)
                    BgrToYuv444p<align>(bgr + colBgr, y + col, u + col, v + col);
                if(width != alignedWidth)
                {
                    size_t col = width - A;
                    BgrToYuv444p<false>(bgr + col*3, y + col, u + col, v + col);
                }
                y += yStride;
                u += uStride;
                v += vStride;
                bgr += bgrStride;
            }
        }
        template <bool align> void AddFeatureDifference(const uint8_t * value, size_t valueStride, size_t width, size_t height, 
            const uint8_t * lo, size_t loStride, const uint8_t * hi, size_t hiStride,
            uint16_t weight, uint8_t * difference, size_t differenceStride)
        {
            assert(width >= A);
            if(align)
            {
                assert(Aligned(value) && Aligned(valueStride));
                assert(Aligned(lo) && Aligned(loStride));
                assert(Aligned(hi) && Aligned(hiStride));
                assert(Aligned(difference) && Aligned(differenceStride));
            }

            size_t alignedWidth = AlignLo(width, A);
            __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth);
            __m128i _weight = _mm_set1_epi16((short)weight);

            for(size_t row = 0; row < height; ++row)
            {
                for(size_t col = 0; col < alignedWidth; col += A)
                    AddFeatureDifference<align>(value, lo, hi, difference, col, _weight, K_INV_ZERO);
                if(alignedWidth != width)
                    AddFeatureDifference<false>(value, lo, hi, difference, width - A, _weight, tailMask);
                value += valueStride;
                lo += loStride;
                hi += hiStride;
                difference += differenceStride;
            }
        }
예제 #27
0
        template <bool align> void BgrToYuv420p(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * y, size_t yStride,
            uint8_t * u, size_t uStride, uint8_t * v, size_t vStride)
        {
            assert((width%2 == 0) && (height%2 == 0) && (width >= DA) && (height >= 2));
            if(align)
            {
                assert(Aligned(y) && Aligned(yStride) && Aligned(u) &&  Aligned(uStride));
                assert(Aligned(v) && Aligned(vStride) && Aligned(bgr) && Aligned(bgrStride));
            }

            size_t alignedWidth = AlignLo(width, DA);
            const size_t A6 = A*6;
            for(size_t row = 0; row < height; row += 2)
            {
                for(size_t colUV = 0, colY = 0, colBgr = 0; colY < alignedWidth; colY += DA, colUV += A, colBgr += A6)
                    BgrToYuv420p<align>(bgr + colBgr, bgrStride, y + colY, yStride, u + colUV, v + colUV);
                if(width != alignedWidth)
                {
                    size_t offset = width - DA;
                    BgrToYuv420p<false>(bgr + offset*3, bgrStride, y + offset, yStride, u + offset/2, v + offset/2);
                }
                y += 2*yStride;
                u += uStride;
                v += vStride;
                bgr += 2*bgrStride;
            }
        }
예제 #28
0
        template <bool align, size_t step> void MeanFilter3x3(
            const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride)
        {
            assert(step*(width - 1) >= A);
            if(align)
                assert(Aligned(src) && Aligned(srcStride) && Aligned(step*width) && Aligned(dst) && Aligned(dstStride));

            __m256i a[3];

            size_t size = step*width;
            size_t bodySize = Simd::AlignHi(size, A) - A;

            Buffer buffer(Simd::AlignHi(size, A));

            LoadNose3<align, step>(src + 0, a);
			SumCol<true>(a, buffer.src0 + 0);
            for(size_t col = A; col < bodySize; col += A)
            {
                LoadBody3<align, step>(src + col, a);
				SumCol<true>(a, buffer.src0 + col);
            }
            LoadTail3<align, step>(src + size - A, a);
			SumCol<true>(a, buffer.src0 + bodySize);

            memcpy(buffer.src1, buffer.src0, sizeof(uint16_t)*(bodySize + A));

            for(size_t row = 0; row < height; ++row, dst += dstStride)
            {
                const uint8_t *src2 = src + srcStride*(row + 1);
                if(row >= height - 2)
                    src2 = src + srcStride*(height - 1);

                LoadNose3<align, step>(src2 + 0, a);
				SumCol<true>(a, buffer.src2 + 0);
                for(size_t col = A; col < bodySize; col += A)
                {
                    LoadBody3<align, step>(src2 + col, a);
					SumCol<true>(a, buffer.src2 + col);
                }
                LoadTail3<align, step>(src2 + size - A, a);
				SumCol<true>(a, buffer.src2 + bodySize);

                for(size_t col = 0; col < bodySize; col += A)
                    Store<align>((__m256i*)(dst + col), AverageRow<true>(buffer, col));
                Store<align>((__m256i*)(dst + size - A), AverageRow<true>(buffer, bodySize));

                Swap(buffer.src0, buffer.src2);
                Swap(buffer.src0, buffer.src1);
            }
        }
예제 #29
0
        template <bool align> void Bgr48pToBgra32(const uint8_t * blue, size_t blueStride, size_t width, size_t height,
            const uint8_t * green, size_t greenStride, const uint8_t * red, size_t redStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            assert(width >= HA);
            if(align)
            {
                assert(Aligned(blue) && Aligned(blueStride));
                assert(Aligned(green) && Aligned(greenStride));
                assert(Aligned(red) && Aligned(redStride));
                assert(Aligned(bgra) && Aligned(bgraStride));
            }

            __m128i _alpha = _mm_slli_si128(_mm_set1_epi16(alpha), 1);
            size_t alignedWidth = AlignLo(width, HA);
            for(size_t row = 0; row < height; ++row)
            {
                for(size_t col = 0, srcOffset = 0, dstOffset = 0; col < alignedWidth; col += HA, srcOffset += A, dstOffset += DA)
                    Bgr48pToBgra32<align>(bgra + dstOffset, blue, green, red, srcOffset, _alpha);
                if(width != alignedWidth)
                    Bgr48pToBgra32<false>(bgra + (width - HA)*4, blue, green, red, (width - HA)*2, _alpha);
                blue += blueStride;
                green += greenStride;
                red += redStride;
                bgra += bgraStride;
            }
        }
예제 #30
0
        template <bool align> void Bgr48pToBgra32(const uint8_t * blue, size_t blueStride, size_t width, size_t height,
            const uint8_t * green, size_t greenStride, const uint8_t * red, size_t redStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha)
        {
            assert(width >= HA);
            if(align)
            {
                assert(Aligned(blue) && Aligned(blueStride));
                assert(Aligned(green) && Aligned(greenStride));
                assert(Aligned(red) && Aligned(redStride));
                assert(Aligned(bgra) && Aligned(bgraStride));
            }

            v128_u8 _alpha = SetU8(alpha);
            size_t alignedWidth = AlignLo(width, HA);
            for(size_t row = 0; row < height; ++row)
            {
                Storer<align> _bgra(bgra);
                Bgr48pToBgra32<align, true>(blue, green, red, 0, _alpha, _bgra);
                for(size_t col = HA; col < alignedWidth; col += HA)
                    Bgr48pToBgra32<align, false>(blue, green, red, col*2, _alpha, _bgra);
                Flush(_bgra);

                if(width != alignedWidth)
                {
                    Storer<false> _bgra(bgra + (width - HA)*4);
                    Bgr48pToBgra32<false, true>(blue, green, red, (width - HA)*2, _alpha, _bgra);
                    Flush(_bgra);
                }

                blue += blueStride;
                green += greenStride;
                red += redStride;
                bgra += bgraStride;
            }
        }