SIMD_INLINE void AddSquareDifference(const uint8_t * src, ptrdiff_t step, const v128_u8 & mask, v128_u32 & sum) { const v128_u8 a = Load<align>(src - step); const v128_u8 b = Load<align>(src + step); const v128_u8 d = vec_and(AbsDifferenceU8(a, b), mask); sum = vec_msum(d, d, sum); }
SIMD_INLINE __m256i AbsSecondDerivative(const uint8_t * src, ptrdiff_t step) { const __m256i s0 = Load<srcAlign && stepAlign>((__m256i*)(src - step)); const __m256i s1 = Load<srcAlign>((__m256i*)src); const __m256i s2 = Load<srcAlign && stepAlign>((__m256i*)(src + step)); return AbsDifferenceU8(_mm256_avg_epu8(s0, s2), s1); }
void AbsGradientSaturatedSum(const uint8_t * src, size_t srcStride, size_t width, size_t height, uint8_t * dst, size_t dstStride) { memset(dst, 0, width); src += srcStride; dst += dstStride; for (size_t row = 2; row < height; ++row) { dst[0] = 0; for (size_t col = 1; col < width - 1; ++col) { const int dy = AbsDifferenceU8(src[col - srcStride], src[col + srcStride]); const int dx = AbsDifferenceU8(src[col - 1], src[col + 1]); dst[col] = MinU8(dx + dy, 0xFF); } dst[width - 1] = 0; src += srcStride; dst += dstStride; } memset(dst, 0, width); }
void AbsDifferenceSums3x3Masked(const uint8_t *current, size_t currentStride, const uint8_t *background, size_t backgroundStride, const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sums) { assert(width > 2 && height > 2); for(size_t i = 0; i < 9; ++i) sums[i] = 0; height -= 2; width -= 2; current += 1 + currentStride; background += 1 + backgroundStride; mask += 1 + maskStride; for(size_t row = 0; row < height; ++row) { int rowSums[9]; for(size_t i = 0; i < 9; ++i) rowSums[i] = 0; for(size_t col = 0; col < width; ++col) { if(mask[col] == index) { int value = current[col]; rowSums[0] += AbsDifferenceU8(value, background[col - backgroundStride - 1]); rowSums[1] += AbsDifferenceU8(value, background[col - backgroundStride]); rowSums[2] += AbsDifferenceU8(value, background[col - backgroundStride + 1]); rowSums[3] += AbsDifferenceU8(value, background[col - 1]); rowSums[4] += AbsDifferenceU8(value, background[col]); rowSums[5] += AbsDifferenceU8(value, background[col + 1]); rowSums[6] += AbsDifferenceU8(value, background[col + backgroundStride - 1]); rowSums[7] += AbsDifferenceU8(value, background[col + backgroundStride]); rowSums[8] += AbsDifferenceU8(value, background[col + backgroundStride + 1]); } } for(size_t i = 0; i < 9; ++i) sums[i] += rowSums[i]; current += currentStride; background += backgroundStride; mask += maskStride; } }
void AbsDifferenceSum(const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride, size_t width, size_t height, uint64_t * sum) { *sum = 0; for(size_t row = 0; row < height; ++row) { int rowSum = 0; for(size_t col = 0; col < width; ++col) { rowSum += AbsDifferenceU8(a[col], b[col]); } *sum += rowSum; a += aStride; b += bStride; } }
void AbsDifferenceSumMasked(const uint8_t *a, size_t aStride, const uint8_t *b, size_t bStride, const uint8_t *mask, size_t maskStride, uint8_t index, size_t width, size_t height, uint64_t * sum) { *sum = 0; for(size_t row = 0; row < height; ++row) { int rowSum = 0; for(size_t col = 0; col < width; ++col) { if(mask[col] == index) rowSum += AbsDifferenceU8(a[col], b[col]); } *sum += rowSum; a += aStride; b += bStride; mask += maskStride; } }
template <bool align> void AbsDifferenceSumMasked(const uint8_t * a, const uint8_t *b, size_t offset, const v128_u8 & mask, v128_u32 & sum) { const v128_u8 _a = vec_and(Load<align>(a + offset), mask); const v128_u8 _b = vec_and(Load<align>(b + offset), mask); sum = vec_msum(AbsDifferenceU8(_a, _b), K8_01, sum); }
template <bool align> void AbsDifferenceSums3Masked(const v128_u8 & current, const uint8_t * background, const v128_u8 & mask, v128_u32 sums[3]) { sums[0] = vec_msum(AbsDifferenceU8(current, vec_and(mask, Load<align>(background - 1))), K8_01, sums[0]); sums[1] = vec_msum(AbsDifferenceU8(current, vec_and(mask, Load<false>(background))), K8_01, sums[1]); sums[2] = vec_msum(AbsDifferenceU8(current, vec_and(mask, Load<false>(background + 1))), K8_01, sums[2]); }
template<bool align> SIMD_INLINE v128_u8 AbsGradientSaturatedSum(const uint8_t * src, size_t stride) { const v128_u8 dx = AbsDifferenceU8(Load<false>(src - 1), Load<false>(src + 1)); const v128_u8 dy = AbsDifferenceU8(Load<align>(src - stride), Load<align>(src + stride)); return vec_adds(dx, dy); }