unsigned int vpx_variance64x64_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1); variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride, 64, 16, &sse2, &sum2); sse1 += sse2; sum1 += sum2; variance_neon_w8(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride), b_stride, 64, 16, &sse2, &sum2); sse1 += sse2; sum1 += sum2; variance_neon_w8(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride), b_stride, 64, 16, &sse2, &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (((int64_t)sum1 * sum1) >> 12); // >> 12 = / 64 * 64 }
unsigned int vpx_variance32x64_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1); variance_neon_w8(a + (32 * a_stride), a_stride, b + (32 * b_stride), b_stride, 32, 32, &sse2, &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64 }
static unsigned int variance8x8_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse) { int sum; variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum); return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8)); }
unsigned int vpx_variance32x32_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse) { int sum; variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum); return *sse - (((int64_t)sum * sum) >> 10); // >> 10 = / 32 * 32 }
unsigned int vpx_variance16x16_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse) { int sum; variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum); return *sse - (((int64_t)sum * sum) >> 8); // >> 8 = / 16 * 16 }
unsigned int vpx_variance8x8_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse) { int sum; variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum); return *sse - (((int64_t)sum * sum) >> 6); // >> 6 = / 8 * 8 }
void vpx_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse, int *sum) { variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum); }