void Yuv444pToHue(const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride, size_t width, size_t height, uint8_t * hue, size_t hueStride) { if(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride) && Aligned(hue) && Aligned(hueStride)) Yuv444pToHue<true>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride); else Yuv444pToHue<false>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride); }
void EdgeBackgroundShiftRangeMasked(const uint8_t * value, size_t valueStride, size_t width, size_t height, uint8_t * background, size_t backgroundStride, const uint8_t * mask, size_t maskStride) { if(Aligned(value) && Aligned(valueStride) && Aligned(background) && Aligned(backgroundStride) && Aligned(mask) && Aligned(maskStride)) EdgeBackgroundShiftRangeMasked<true>(value, valueStride, width, height, background, backgroundStride, mask, maskStride); else EdgeBackgroundShiftRangeMasked<false>(value, valueStride, width, height, background, backgroundStride, mask, maskStride); }
void DeinterleaveUv(const uint8_t * uv, size_t uvStride, size_t width, size_t height, uint8_t * u, size_t uStride, uint8_t * v, size_t vStride) { if(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride)) DeinterleaveUv<true>(uv, uvStride, width, height, u, uStride, v, vStride); else DeinterleaveUv<false>(uv, uvStride, width, height, u, uStride, v, vStride); }
void ReduceGray3x3(const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride, uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride, int compensation) { if(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)) ReduceGray3x3<true>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride, compensation); else ReduceGray3x3<false>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride, compensation); }
void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride) { if(Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride)) TextureBoostedSaturatedGradient<true>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride); else TextureBoostedSaturatedGradient<false>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride); }
void EdgeBackgroundGrowRangeSlow(const uint8_t * value, size_t valueStride, size_t width, size_t height, uint8_t * background, size_t backgroundStride) { if(Aligned(value) && Aligned(valueStride) && Aligned(background) && Aligned(backgroundStride)) EdgeBackgroundGrowRangeSlow<true>(value, valueStride, width, height, background, backgroundStride); else EdgeBackgroundGrowRangeSlow<false>(value, valueStride, width, height, background, backgroundStride); }
void AbsDifferenceSumMasked(const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride, const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sum) { if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(mask) && Aligned(maskStride)) AbsDifferenceSumMasked<true>(a, aStride, b, bStride, mask, maskStride, index, width, height, sum); else AbsDifferenceSumMasked<false>(a, aStride, b, bStride, mask, maskStride, index, width, height, sum); }
void EdgeBackgroundIncrementCount(const uint8_t * value, size_t valueStride, size_t width, size_t height, const uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t * backgroundCount, size_t backgroundCountStride) { if (Aligned(value) && Aligned(valueStride) && Aligned(backgroundValue) && Aligned(backgroundValueStride) && Aligned(backgroundCount) && Aligned(backgroundCountStride)) EdgeBackgroundIncrementCount<true>(value, valueStride, width, height, backgroundValue, backgroundValueStride, backgroundCount, backgroundCountStride); else EdgeBackgroundIncrementCount<false>(value, valueStride, width, height, backgroundValue, backgroundValueStride, backgroundCount, backgroundCountStride); }
void GaussianBlur3x3(const uint8_t * src, size_t srcStride, size_t width, size_t height, size_t channelCount, uint8_t * dst, size_t dstStride) { if(Aligned(src) && Aligned(srcStride) && Aligned(channelCount*width) && Aligned(dst) && Aligned(dstStride)) GaussianBlur3x3<true>(src, srcStride, width, height, channelCount, dst, dstStride); else GaussianBlur3x3<false>(src, srcStride, width, height, channelCount, dst, dstStride); }
void ConditionalSquareSum(const uint8_t * src, size_t srcStride, size_t width, size_t height, const uint8_t * mask, size_t maskStride, uint8_t value, uint64_t * sum) { if (Aligned(src) && Aligned(srcStride) && Aligned(mask) && Aligned(maskStride)) ConditionalSquareSum<true, compareType>(src, srcStride, width, height, mask, maskStride, value, sum); else ConditionalSquareSum<false, compareType>(src, srcStride, width, height, mask, maskStride, value, sum); }
void StretchGray2x2(const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride, uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride) { if(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)) StretchGray2x2<true>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride); else StretchGray2x2<false>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride); }
void AbsDifferenceSum(const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride, size_t width, size_t height, uint64_t * sum) { if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride)) AbsDifferenceSum<true>(a, aStride, b, bStride, width, height, sum); else AbsDifferenceSum<false>(a, aStride, b, bStride, width, height, sum); }
void AbsDifferenceSums3x3Masked(const uint8_t *current, size_t currentStride, const uint8_t *background, size_t backgroundStride, const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sums) { if (Aligned(background) && Aligned(backgroundStride)) AbsDifferenceSums3x3Masked<true>(current, currentStride, background, backgroundStride, mask, maskStride, index, width, height, sums); else AbsDifferenceSums3x3Masked<false>(current, currentStride, background, backgroundStride, mask, maskStride, index, width, height, sums); }
void AbsDifferenceSums3x3(const uint8_t * current, size_t currentStride, const uint8_t * background, size_t backgroundStride, size_t width, size_t height, uint64_t * sums) { if (Aligned(background) && Aligned(backgroundStride)) AbsDifferenceSums3x3<true>(current, currentStride, background, backgroundStride, width, height, sums); else AbsDifferenceSums3x3<false>(current, currentStride, background, backgroundStride, width, height, sums); }
void Laplace(const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride) { assert(dstStride%sizeof(int16_t) == 0); if(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)) Laplace<true, false>(src, srcStride, width, height, (int16_t *)dst, dstStride/sizeof(int16_t)); else Laplace<false, false>(src, srcStride, width, height, (int16_t *)dst, dstStride/sizeof(int16_t)); }
void InterferenceDecrement(uint8_t * statistic, size_t stride, size_t width, size_t height, uint8_t decrement, int16_t saturation) { assert(Aligned(stride, 2)); if(Aligned(statistic) && Aligned(stride)) InterferenceChange<true, false>((int16_t*)statistic, stride/2, width, height, decrement, saturation); else InterferenceChange<false, false>((int16_t*)statistic, stride/2, width, height, decrement, saturation); }
void EdgeBackgroundAdjustRange(uint8_t * backgroundCount, size_t backgroundCountStride, size_t width, size_t height, uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t threshold) { if (Aligned(backgroundValue) && Aligned(backgroundValueStride) && Aligned(backgroundCount) && Aligned(backgroundCountStride)) EdgeBackgroundAdjustRange<true>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride, threshold); else EdgeBackgroundAdjustRange<false>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride, threshold); }
void InterferenceDecrementMasked(uint8_t * statistic, size_t statisticStride, size_t width, size_t height, uint8_t decrement, int16_t saturation, const uint8_t * mask, size_t maskStride, uint8_t index) { assert(Aligned(statisticStride, 2)); if(Aligned(statistic) && Aligned(statisticStride) && Aligned(mask) && Aligned(maskStride)) InterferenceChangeMasked<true, false>((int16_t*)statistic, statisticStride/2, width, height, decrement, saturation, mask, maskStride, index); else InterferenceChangeMasked<false, false>((int16_t*)statistic, statisticStride/2, width, height, decrement, saturation, mask, maskStride, index); }
void HistogramMasked(const uint8_t * src, size_t srcStride, size_t width, size_t height, const uint8_t * mask, size_t maskStride, uint8_t index, uint32_t * histogram) { assert(width >= A); if(Aligned(src) && Aligned(srcStride)&& Aligned(mask) && Aligned(maskStride)) HistogramMasked<true>(src, srcStride, width, height, mask, maskStride, index, histogram); else HistogramMasked<false>(src, srcStride, width, height, mask, maskStride, index, histogram); }
void AbsSecondDerivativeHistogram(const uint8_t *src, size_t width, size_t height, size_t stride, size_t step, size_t indent, uint32_t * histogram) { assert(width > 2*indent && height > 2*indent && indent >= step && width >= A + 2*indent); if(Aligned(src) && Aligned(stride)) AbsSecondDerivativeHistogram<true>(src, width, height, stride, step, indent, histogram); else AbsSecondDerivativeHistogram<false>(src, width, height, stride, step, indent, histogram); }
void BgraToBayer(const uint8_t * bgra, size_t width, size_t height, size_t bgraStride, uint8_t * bayer, size_t bayerStride) { assert(width >= A); if(align) assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(bayer) && Aligned(bayerStride)); size_t alignedWidth = AlignLo(width, A); const v128_u8 perm[4][2] = { {K8_PERM_GR, K8_PERM_BG}, {K8_PERM_GB, K8_PERM_RG}, {K8_PERM_RG, K8_PERM_GB}, {K8_PERM_BG, K8_PERM_GR} }; for(size_t row = 0; row < height; row += 2) { Loader<align> _bgra0(bgra); Storer<align> _bayer0(bayer); BgraToBayer<format, 0, align, true>(_bgra0, perm, _bayer0); for(size_t col = A; col < alignedWidth; col += A) BgraToBayer<format, 0, align, false>(_bgra0, perm, _bayer0); Flush(_bayer0); if(width != alignedWidth) { Loader<false> _bgra(bgra + 4*(width - A)); Storer<false> _bayer(bayer + width - A); BgraToBayer<format, 0, false, true>(_bgra, perm, _bayer); Flush(_bayer); } bgra += bgraStride; bayer += bayerStride; Loader<align> _bgra1(bgra); Storer<align> _bayer1(bayer); BgraToBayer<format, 1, align, true>(_bgra1, perm, _bayer1); for(size_t col = A; col < alignedWidth; col += A) BgraToBayer<format, 1, align, false>(_bgra1, perm, _bayer1); Flush(_bayer1); if(width != alignedWidth) { Loader<false> _bgra(bgra + 4*(width - A)); Storer<false> _bayer(bayer + width - A); BgraToBayer<format, 1, false, true>(_bgra, perm, _bayer); Flush(_bayer); } bgra += bgraStride; bayer += bayerStride; } }
void EdgeBackgroundAdjustRangeMasked(uint8_t * backgroundCount, size_t backgroundCountStride, size_t width, size_t height, uint8_t * backgroundValue, size_t backgroundValueStride, uint8_t threshold, const uint8_t * mask, size_t maskStride) { if (Aligned(backgroundValue) && Aligned(backgroundValueStride) && Aligned(backgroundCount) && Aligned(backgroundCountStride) && Aligned(mask) && Aligned(maskStride)) EdgeBackgroundAdjustRangeMasked<true>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride, threshold, mask, maskStride); else EdgeBackgroundAdjustRangeMasked<false>(backgroundCount, backgroundCountStride, width, height, backgroundValue, backgroundValueStride, threshold, mask, maskStride); }
template <bool align, size_t step> void GaussianBlur3x3( const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride) { assert(step*width >= A); if(align) assert(Aligned(src) && Aligned(srcStride) && Aligned(step*width) && Aligned(dst) && Aligned(dstStride)); v128_u8 a[3]; size_t size = step*width; size_t bodySize = Simd::AlignHi(size, A) - A; Buffer buffer(Simd::AlignHi(size, A)); LoadNose3<align, step>(src + 0, a); BlurCol<true>(a, buffer.src0 + 0); for(size_t col = A; col < bodySize; col += A) { LoadBody3<align, step>(src + col, a); BlurCol<true>(a, buffer.src0 + col); } LoadTail3<align, step>(src + size - A, a); BlurCol<align>(a, buffer.src0 + size - A); memcpy(buffer.src1, buffer.src0, sizeof(uint16_t)*size); for(size_t row = 0; row < height; ++row, dst += dstStride) { const uint8_t *src2 = src + srcStride*(row + 1); if(row >= height - 2) src2 = src + srcStride*(height - 1); LoadNose3<align, step>(src2 + 0, a); BlurCol<true>(a, buffer.src2 + 0); for(size_t col = A; col < bodySize; col += A) { LoadBody3<align, step>(src2 + col, a); BlurCol<true>(a, buffer.src2 + col); } LoadTail3<align, step>(src2 + size - A, a); BlurCol<align>(a, buffer.src2 + size - A); Storer<align> _dst(dst); _dst.First(BlurRow<true>(buffer, 0)); for(size_t col = A; col < bodySize; col += A) _dst.Next(BlurRow<true>(buffer, col)); Flush(_dst); Store<align>(dst + size - A, BlurRow<align>(buffer, size - A)); Swap(buffer.src0, buffer.src2); Swap(buffer.src0, buffer.src1); } }
void AlphaBlending(const uint8_t *src, size_t srcStride, size_t width, size_t height, size_t channelCount, const uint8_t *alpha, size_t alphaStride, uint8_t *dst, size_t dstStride) { if(channelCount == 3) Base::AlphaBlending(src, srcStride, width, height, channelCount, alpha, alphaStride, dst, dstStride); else { if(Aligned(src) && Aligned(srcStride) && Aligned(alpha) && Aligned(alphaStride) && Aligned(dst) && Aligned(dstStride)) AlphaBlending<true>(src, srcStride, width, height, channelCount, alpha, alphaStride, dst, dstStride); else AlphaBlending<false>(src, srcStride, width, height, channelCount, alpha, alphaStride, dst, dstStride); } }
template <bool align> void BgrToYuv444p(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * y, size_t yStride, uint8_t * u, size_t uStride, uint8_t * v, size_t vStride) { assert(width >= A); if(align) { assert(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride)); assert(Aligned(v) && Aligned(vStride) && Aligned(bgr) && Aligned(bgrStride)); } size_t alignedWidth = AlignLo(width, A); const size_t A3 = A*3; for(size_t row = 0; row < height; ++row) { for(size_t col = 0, colBgr = 0; col < alignedWidth; col += A, colBgr += A3) BgrToYuv444p<align>(bgr + colBgr, y + col, u + col, v + col); if(width != alignedWidth) { size_t col = width - A; BgrToYuv444p<false>(bgr + col*3, y + col, u + col, v + col); } y += yStride; u += uStride; v += vStride; bgr += bgrStride; } }
template <bool align> void AddFeatureDifference(const uint8_t * value, size_t valueStride, size_t width, size_t height, const uint8_t * lo, size_t loStride, const uint8_t * hi, size_t hiStride, uint16_t weight, uint8_t * difference, size_t differenceStride) { assert(width >= A); if(align) { assert(Aligned(value) && Aligned(valueStride)); assert(Aligned(lo) && Aligned(loStride)); assert(Aligned(hi) && Aligned(hiStride)); assert(Aligned(difference) && Aligned(differenceStride)); } size_t alignedWidth = AlignLo(width, A); __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth); __m128i _weight = _mm_set1_epi16((short)weight); for(size_t row = 0; row < height; ++row) { for(size_t col = 0; col < alignedWidth; col += A) AddFeatureDifference<align>(value, lo, hi, difference, col, _weight, K_INV_ZERO); if(alignedWidth != width) AddFeatureDifference<false>(value, lo, hi, difference, width - A, _weight, tailMask); value += valueStride; lo += loStride; hi += hiStride; difference += differenceStride; } }
template <bool align> void BgrToYuv420p(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * y, size_t yStride, uint8_t * u, size_t uStride, uint8_t * v, size_t vStride) { assert((width%2 == 0) && (height%2 == 0) && (width >= DA) && (height >= 2)); if(align) { assert(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride)); assert(Aligned(v) && Aligned(vStride) && Aligned(bgr) && Aligned(bgrStride)); } size_t alignedWidth = AlignLo(width, DA); const size_t A6 = A*6; for(size_t row = 0; row < height; row += 2) { for(size_t colUV = 0, colY = 0, colBgr = 0; colY < alignedWidth; colY += DA, colUV += A, colBgr += A6) BgrToYuv420p<align>(bgr + colBgr, bgrStride, y + colY, yStride, u + colUV, v + colUV); if(width != alignedWidth) { size_t offset = width - DA; BgrToYuv420p<false>(bgr + offset*3, bgrStride, y + offset, yStride, u + offset/2, v + offset/2); } y += 2*yStride; u += uStride; v += vStride; bgr += 2*bgrStride; } }
template <bool align, size_t step> void MeanFilter3x3( const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride) { assert(step*(width - 1) >= A); if(align) assert(Aligned(src) && Aligned(srcStride) && Aligned(step*width) && Aligned(dst) && Aligned(dstStride)); __m256i a[3]; size_t size = step*width; size_t bodySize = Simd::AlignHi(size, A) - A; Buffer buffer(Simd::AlignHi(size, A)); LoadNose3<align, step>(src + 0, a); SumCol<true>(a, buffer.src0 + 0); for(size_t col = A; col < bodySize; col += A) { LoadBody3<align, step>(src + col, a); SumCol<true>(a, buffer.src0 + col); } LoadTail3<align, step>(src + size - A, a); SumCol<true>(a, buffer.src0 + bodySize); memcpy(buffer.src1, buffer.src0, sizeof(uint16_t)*(bodySize + A)); for(size_t row = 0; row < height; ++row, dst += dstStride) { const uint8_t *src2 = src + srcStride*(row + 1); if(row >= height - 2) src2 = src + srcStride*(height - 1); LoadNose3<align, step>(src2 + 0, a); SumCol<true>(a, buffer.src2 + 0); for(size_t col = A; col < bodySize; col += A) { LoadBody3<align, step>(src2 + col, a); SumCol<true>(a, buffer.src2 + col); } LoadTail3<align, step>(src2 + size - A, a); SumCol<true>(a, buffer.src2 + bodySize); for(size_t col = 0; col < bodySize; col += A) Store<align>((__m256i*)(dst + col), AverageRow<true>(buffer, col)); Store<align>((__m256i*)(dst + size - A), AverageRow<true>(buffer, bodySize)); Swap(buffer.src0, buffer.src2); Swap(buffer.src0, buffer.src1); } }
template <bool align> void Bgr48pToBgra32(const uint8_t * blue, size_t blueStride, size_t width, size_t height, const uint8_t * green, size_t greenStride, const uint8_t * red, size_t redStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha) { assert(width >= HA); if(align) { assert(Aligned(blue) && Aligned(blueStride)); assert(Aligned(green) && Aligned(greenStride)); assert(Aligned(red) && Aligned(redStride)); assert(Aligned(bgra) && Aligned(bgraStride)); } __m128i _alpha = _mm_slli_si128(_mm_set1_epi16(alpha), 1); size_t alignedWidth = AlignLo(width, HA); for(size_t row = 0; row < height; ++row) { for(size_t col = 0, srcOffset = 0, dstOffset = 0; col < alignedWidth; col += HA, srcOffset += A, dstOffset += DA) Bgr48pToBgra32<align>(bgra + dstOffset, blue, green, red, srcOffset, _alpha); if(width != alignedWidth) Bgr48pToBgra32<false>(bgra + (width - HA)*4, blue, green, red, (width - HA)*2, _alpha); blue += blueStride; green += greenStride; red += redStride; bgra += bgraStride; } }
template <bool align> void Bgr48pToBgra32(const uint8_t * blue, size_t blueStride, size_t width, size_t height, const uint8_t * green, size_t greenStride, const uint8_t * red, size_t redStride, uint8_t * bgra, size_t bgraStride, uint8_t alpha) { assert(width >= HA); if(align) { assert(Aligned(blue) && Aligned(blueStride)); assert(Aligned(green) && Aligned(greenStride)); assert(Aligned(red) && Aligned(redStride)); assert(Aligned(bgra) && Aligned(bgraStride)); } v128_u8 _alpha = SetU8(alpha); size_t alignedWidth = AlignLo(width, HA); for(size_t row = 0; row < height; ++row) { Storer<align> _bgra(bgra); Bgr48pToBgra32<align, true>(blue, green, red, 0, _alpha, _bgra); for(size_t col = HA; col < alignedWidth; col += HA) Bgr48pToBgra32<align, false>(blue, green, red, col*2, _alpha, _bgra); Flush(_bgra); if(width != alignedWidth) { Storer<false> _bgra(bgra + (width - HA)*4); Bgr48pToBgra32<false, true>(blue, green, red, (width - HA)*2, _alpha, _bgra); Flush(_bgra); } blue += blueStride; green += greenStride; red += redStride; bgra += bgraStride; } }