template <size_t channelCount> void ResizeBilinear( const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride, uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride) { assert(dstWidth >= A); struct One { uint8_t channels[channelCount]; }; struct Two { uint8_t channels[channelCount*2]; }; size_t size = 2*dstWidth*channelCount; size_t bufferSize = AlignHi(dstWidth, A)*channelCount*2; size_t alignedSize = AlignHi(size, DA) - DA; const size_t step = A*channelCount; Buffer buffer(bufferSize, dstWidth, dstHeight); Base::EstimateAlphaIndex(srcHeight, dstHeight, buffer.iy, buffer.ay, 1); EstimateAlphaIndexX<channelCount>(srcWidth, dstWidth, buffer.ix, buffer.ax); ptrdiff_t previous = -2; __m256i a[2]; for(size_t yDst = 0; yDst < dstHeight; yDst++, dst += dstStride) { a[0] = _mm256_set1_epi16(int16_t(Base::FRACTION_RANGE - buffer.ay[yDst])); a[1] = _mm256_set1_epi16(int16_t(buffer.ay[yDst])); ptrdiff_t sy = buffer.iy[yDst]; int k = 0; if(sy == previous) k = 2; else if(sy == previous + 1) { Swap(buffer.bx[0], buffer.bx[1]); k = 1; } previous = sy; for(; k < 2; k++) { Two * pb = (Two *)buffer.bx[k]; const One * psrc = (const One *)(src + (sy + k)*srcStride); for(size_t x = 0; x < dstWidth; x++) pb[x] = *(Two *)(psrc + buffer.ix[x]); uint8_t * pbx = buffer.bx[k]; for(size_t i = 0; i < bufferSize; i += step) InterpolateX<channelCount>((__m256i*)(buffer.ax + i), (__m256i*)(pbx + i)); } for(size_t ib = 0, id = 0; ib < alignedSize; ib += DA, id += A) InterpolateY<true>(buffer.bx[0] + ib, buffer.bx[1] + ib, a, dst + id); size_t i = size - DA; InterpolateY<false>(buffer.bx[0] + i, buffer.bx[1] + i, a, dst + i/2); } }
template<bool align> void AbsSecondDerivativeHistogram(const uint8_t *src, size_t width, size_t height, size_t stride, size_t step, size_t indent, uint32_t * histogram) { memset(histogram, 0, sizeof(uint32_t)*HISTOGRAM_SIZE); Buffer buffer(stride); buffer.p += indent; src += indent*(stride + 1); height -= 2*indent; width -= 2*indent; ptrdiff_t bodyStart = (uint8_t*)AlignHi(buffer.p, A) - buffer.p; ptrdiff_t bodyEnd = bodyStart + AlignLo(width - bodyStart, A); size_t rowStep = step*stride; for(size_t row = 0; row < height; ++row) { if(bodyStart) AbsSecondDerivative<false>(src, step, rowStep, buffer.p); for(ptrdiff_t col = bodyStart; col < bodyEnd; col += A) AbsSecondDerivative<align>(src + col, step, rowStep, buffer.p + col); if(width != (size_t)bodyEnd) AbsSecondDerivative<false>(src + width - A, step, rowStep, buffer.p + width - A); for(size_t i = 0; i < width; ++i) ++histogram[buffer.p[i]]; src += stride; } }
uint32_t Crc32c(const void *src, size_t size) { uint8_t * nose = (uint8_t*)src; size_t * body = (size_t*)AlignHi(nose, sizeof(size_t)); size_t * tail = (size_t*)AlignLo(nose + size, sizeof(size_t)); size_t crc = 0xFFFFFFFF; Crc32c(crc, nose, (uint8_t*)body); Crc32c(crc, body, tail); Crc32c(crc, (uint8_t*)tail, nose + size); return ~(uint32_t)crc; }
template<bool align> void AbsSecondDerivativeHistogram(const uint8_t *src, size_t width, size_t height, size_t stride, size_t step, size_t indent, uint32_t * histogram) { Buffer<uint8_t> buffer(AlignHi(width, A), HISTOGRAM_SIZE); buffer.v += indent; src += indent*(stride + 1); height -= 2*indent; width -= 2*indent; ptrdiff_t bodyStart = (uint8_t*)AlignHi(buffer.v, A) - buffer.v; ptrdiff_t bodyEnd = bodyStart + AlignLo(width - bodyStart, A); size_t rowStep = step*stride; size_t alignedWidth = Simd::AlignLo(width, 4); for(size_t row = 0; row < height; ++row) { if(bodyStart) AbsSecondDerivative<false>(src, step, rowStep, buffer.v); for(ptrdiff_t col = bodyStart; col < bodyEnd; col += A) AbsSecondDerivative<align>(src + col, step, rowStep, buffer.v + col); if(width != (size_t)bodyEnd) AbsSecondDerivative<false>(src + width - A, step, rowStep, buffer.v + width - A); size_t col = 0; for(; col < alignedWidth; col += 4) { ++buffer.h[0][buffer.v[col + 0]]; ++buffer.h[1][buffer.v[col + 1]]; ++buffer.h[2][buffer.v[col + 2]]; ++buffer.h[3][buffer.v[col + 3]]; } for(; col < width; ++col) ++buffer.h[0][buffer.v[col + 0]]; src += stride; } SumHistograms(buffer.h[0], 0, histogram); }
template<bool align> void HistogramMasked(const uint8_t * src, size_t srcStride, size_t width, size_t height, const uint8_t * mask, size_t maskStride, uint8_t index, uint32_t * histogram) { Buffer<uint16_t> buffer(AlignHi(width, A), HISTOGRAM_SIZE + 8); size_t widthAligned4 = Simd::AlignLo(width, 4); size_t widthAlignedA = Simd::AlignLo(width, A); size_t widthAlignedDA = Simd::AlignLo(width, DA); __m256i _index = _mm256_set1_epi8(index); for(size_t row = 0; row < height; ++row) { size_t col = 0; for(; col < widthAlignedDA; col += DA) { MaskSrc<align, true>(src, mask, _index, col, buffer.v); MaskSrc<align, true>(src, mask, _index, col + A, buffer.v); } for(; col < widthAlignedA; col += A) MaskSrc<align, true>(src, mask, _index, col, buffer.v); if(width != widthAlignedA) MaskSrc<false, false>(src, mask, _index, width - A, buffer.v); for(col = 0; col < widthAligned4; col += 4) { ++buffer.h[0][buffer.v[col + 0]]; ++buffer.h[1][buffer.v[col + 1]]; ++buffer.h[2][buffer.v[col + 2]]; ++buffer.h[3][buffer.v[col + 3]]; } for(; col < width; ++col) ++buffer.h[0][buffer.v[col]]; src += srcStride; mask += maskStride; } SumHistograms(buffer.h[0], 8, histogram); }